github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/target.go (about) 1 // Package ais provides core functionality for the AIStore object storage. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ais 6 7 import ( 8 "context" 9 "fmt" 10 "io" 11 "net" 12 "net/http" 13 "net/url" 14 "os" 15 "path/filepath" 16 "strconv" 17 "strings" 18 "sync" 19 "time" 20 21 "github.com/NVIDIA/aistore/ais/backend" 22 "github.com/NVIDIA/aistore/ais/s3" 23 "github.com/NVIDIA/aistore/api/apc" 24 "github.com/NVIDIA/aistore/cmn" 25 "github.com/NVIDIA/aistore/cmn/archive" 26 "github.com/NVIDIA/aistore/cmn/atomic" 27 "github.com/NVIDIA/aistore/cmn/cos" 28 "github.com/NVIDIA/aistore/cmn/debug" 29 "github.com/NVIDIA/aistore/cmn/feat" 30 "github.com/NVIDIA/aistore/cmn/fname" 31 "github.com/NVIDIA/aistore/cmn/kvdb" 32 "github.com/NVIDIA/aistore/cmn/mono" 33 "github.com/NVIDIA/aistore/cmn/nlog" 34 "github.com/NVIDIA/aistore/core" 35 "github.com/NVIDIA/aistore/core/meta" 36 "github.com/NVIDIA/aistore/ec" 37 "github.com/NVIDIA/aistore/ext/dload" 38 "github.com/NVIDIA/aistore/ext/dsort" 39 "github.com/NVIDIA/aistore/ext/etl" 40 "github.com/NVIDIA/aistore/fs" 41 "github.com/NVIDIA/aistore/fs/health" 42 "github.com/NVIDIA/aistore/memsys" 43 "github.com/NVIDIA/aistore/mirror" 44 "github.com/NVIDIA/aistore/reb" 45 "github.com/NVIDIA/aistore/res" 46 "github.com/NVIDIA/aistore/stats" 47 "github.com/NVIDIA/aistore/transport" 48 "github.com/NVIDIA/aistore/volume" 49 "github.com/NVIDIA/aistore/xact/xreg" 50 "github.com/NVIDIA/aistore/xact/xs" 51 ) 52 53 const dbName = "ais.db" 54 55 const clusterClockDrift = 5 * time.Millisecond // is expected to be bounded by 56 57 type ( 58 regstate struct { 59 mu sync.Mutex // serialize metasync Rx, stopping, and transitioning to standby 60 disabled atomic.Bool // true: standing by 61 prevbmd atomic.Bool // special 62 } 63 backends map[string]core.Backend 64 // main 65 target struct { 66 htrun 67 backend backends 68 fshc *health.FSHC 69 fsprg fsprungroup 70 reb *reb.Reb 71 res *res.Res 72 transactions transactions 73 regstate regstate 74 } 75 ) 76 77 type redial struct { 78 t *target 79 dialTout time.Duration 80 totalTout time.Duration 81 inUse string 82 } 83 84 // interface guard 85 var ( 86 _ cos.Runner = (*target)(nil) 87 _ htext = (*target)(nil) 88 ) 89 90 func (*target) Name() string { return apc.Target } // as cos.Runner 91 92 // as htext 93 func (*target) interruptedRestarted() (interrupted, restarted bool) { 94 interrupted = fs.MarkerExists(fname.RebalanceMarker) 95 restarted = fs.MarkerExists(fname.NodeRestartedPrev) 96 return 97 } 98 99 // 100 // target 101 // 102 103 func (t *target) initBackends() { 104 config := cmn.GCO.Get() 105 aisbp := backend.NewAIS(t) 106 t.backend[apc.AIS] = aisbp // always present 107 t.backend[apc.HTTP] = backend.NewHTTP(t, config) // ditto 108 109 if aisConf := config.Backend.Get(apc.AIS); aisConf != nil { 110 if err := aisbp.Apply(aisConf, "init", &config.ClusterConfig); err != nil { 111 nlog.Errorln(t.String()+":", err, "- proceeding to start anyway") 112 } else { 113 nlog.Infoln(t.String()+": remote-ais", aisConf) 114 } 115 } 116 117 if err := t._initBuiltin(); err != nil { 118 cos.ExitLog(err) 119 } 120 } 121 122 // init built-in (via build tags) backends 123 // - remote (e.g. cloud) backends w/ empty stubs unless populated via build tags 124 // - enabled/disabled via config.Backend 125 func (t *target) _initBuiltin() error { 126 var ( 127 enabled, disabled, notlinked []string 128 config = cmn.GCO.Get() 129 ) 130 for provider := range apc.Providers { 131 var ( 132 add core.Backend 133 err error 134 ) 135 switch provider { 136 case apc.AWS: 137 add, err = backend.NewAWS(t) 138 case apc.GCP: 139 add, err = backend.NewGCP(t) 140 case apc.Azure: 141 add, err = backend.NewAzure(t) 142 case apc.AIS, apc.HTTP: 143 continue 144 default: 145 return fmt.Errorf(cmn.FmtErrUnknown, t, "backend provider", provider) 146 } 147 t.backend[provider] = add 148 149 configured := config.Backend.Get(provider) != nil 150 switch { 151 case err == nil && configured: 152 enabled = append(enabled, provider) 153 case err == nil && !configured: 154 disabled = append(disabled, provider) 155 case err != nil && configured: 156 notlinked = append(notlinked, provider) 157 } 158 } 159 switch { 160 case len(notlinked) > 0: 161 return fmt.Errorf("%s backends: enabled %v, disabled %v, missing in the build %v", t, enabled, disabled, notlinked) 162 case len(disabled) > 0: 163 nlog.Warningf("%s backends: enabled %v, disabled %v", t, enabled, disabled) 164 default: 165 nlog.Infoln(t.String(), "backends:", enabled) 166 } 167 return nil 168 } 169 170 func (t *target) aisbp() *backend.AISbp { 171 bendp := t.backend[apc.AIS] 172 return bendp.(*backend.AISbp) 173 } 174 175 func (t *target) init(config *cmn.Config) { 176 t.initSnode(config) 177 178 // (a) get node ID from command-line or env var (see envDaemonID()) 179 // (b) load existing node ID (replicated xattr at roots of respective mountpaths) 180 // (c) generate a new one (genDaemonID()) 181 // - in that exact sequence 182 tid, generated := initTID(config) 183 if generated && len(config.FSP.Paths) > 0 { 184 var recovered bool 185 // in an unlikely event when losing all mountpath-stored IDs but still having a volume 186 tid, recovered = volume.RecoverTID(tid, config.FSP.Paths) 187 generated = !recovered 188 189 // TODO: generated == true will not sit well with loading a local copy of Smap 190 // later on during startup sequence - and not finding _this_ target in it 191 } 192 t.si.Init(tid, apc.Target) 193 194 cos.InitShortID(t.si.Digest()) 195 196 memsys.Init(t.SID(), t.SID(), config) 197 198 // new fs, check and add mountpaths 199 vini := volume.IniCtx{ 200 UseLoopbacks: daemon.cli.target.useLoopbackDevs, 201 IgnoreMissing: daemon.cli.target.startWithLostMountpath, 202 RandomTID: generated, 203 } 204 newVol := volume.Init(t, config, vini) 205 fs.ComputeDiskSize() 206 207 t.initHostIP(config) 208 daemon.rg.add(t) 209 210 ts := stats.NewTrunner(t) // iostat below 211 startedUp := ts.Init(t) // reg common metrics (and target-only - via RegMetrics/regDiskMetrics below) 212 daemon.rg.add(ts) 213 t.statsT = ts // stats tracker 214 215 k := newTalive(t, ts, startedUp) 216 daemon.rg.add(k) 217 t.keepalive = k 218 219 t.fsprg.init(t, newVol) // subgroup of the daemon.rg rungroup 220 221 sc := transport.Init(ts, config) // init transport sub-system; new stream collector 222 daemon.rg.add(sc) 223 224 fshc := health.NewFSHC(t) 225 daemon.rg.add(fshc) 226 t.fshc = fshc 227 228 if err := ts.InitCDF(); err != nil { 229 cos.ExitLog(err) 230 } 231 fs.Clblk() 232 } 233 234 func (t *target) initHostIP(config *cmn.Config) { 235 hostIP := os.Getenv("AIS_HOST_IP") 236 if hostIP == "" { 237 return 238 } 239 extAddr := net.ParseIP(hostIP) 240 cos.AssertMsg(extAddr != nil, "invalid public IP addr via 'AIS_HOST_IP' env: "+hostIP) 241 242 extPort := config.HostNet.Port 243 if portStr := os.Getenv("AIS_HOST_PORT"); portStr != "" { 244 portNum, err := cmn.ParsePort(portStr) 245 cos.AssertNoErr(err) 246 extPort = portNum 247 } 248 t.si.PubNet.Hostname = extAddr.String() 249 t.si.PubNet.Port = strconv.Itoa(extPort) 250 t.si.PubNet.URL = fmt.Sprintf("%s://%s:%d", config.Net.HTTP.Proto, extAddr.String(), extPort) 251 252 nlog.Infoln("AIS_HOST_IP:", hostIP, "pub:", t.si.URL(cmn.NetPublic)) 253 254 // applies to intra-cluster networks unless separately defined 255 if !config.HostNet.UseIntraControl { 256 t.si.ControlNet = t.si.PubNet 257 } 258 if !config.HostNet.UseIntraData { 259 t.si.DataNet = t.si.PubNet 260 } 261 } 262 263 func initTID(config *cmn.Config) (tid string, generated bool) { 264 if tid = envDaemonID(apc.Target); tid != "" { 265 if err := cos.ValidateDaemonID(tid); err != nil { 266 nlog.Errorln("Warning:", err) 267 } 268 return tid, false 269 } 270 271 var err error 272 if tid, err = fs.LoadNodeID(config.FSP.Paths); err != nil { 273 cos.ExitLog(err) // FATAL 274 } 275 if tid != "" { 276 return tid, false 277 } 278 279 // this target: generate random ID 280 tid = genDaemonID(apc.Target, config) 281 err = cos.ValidateDaemonID(tid) 282 debug.AssertNoErr(err) 283 nlog.Infoln(meta.Tname(tid) + ": ID randomly generated") 284 return tid, true 285 } 286 287 func regDiskMetrics(node *meta.Snode, tstats *stats.Trunner, mpi fs.MPI) { 288 for _, mi := range mpi { 289 for _, disk := range mi.Disks { 290 tstats.RegDiskMetrics(node, disk) 291 } 292 } 293 } 294 295 func (t *target) Run() error { 296 if err := t.si.Validate(); err != nil { 297 cos.ExitLog(err) 298 } 299 config := cmn.GCO.Get() 300 t.htrun.init(config) 301 302 tstats := t.statsT.(*stats.Trunner) 303 304 core.Tinit(t, tstats, true /*run hk*/) 305 306 // metrics, disks first 307 availablePaths, disabledPaths := fs.Get() 308 if len(availablePaths) == 0 { 309 cos.ExitLog(cmn.ErrNoMountpaths) 310 } 311 regDiskMetrics(t.si, tstats, availablePaths) 312 regDiskMetrics(t.si, tstats, disabledPaths) 313 t.statsT.RegMetrics(t.si) // + Prometheus, if configured 314 315 fatalErr, writeErr := t.checkRestarted(config) 316 if fatalErr != nil { 317 cos.ExitLog(fatalErr) 318 } 319 if writeErr != nil { 320 nlog.Errorln("") 321 nlog.Errorln(writeErr) 322 nlog.Errorln("") 323 } 324 325 // register object type and workfile type 326 fs.CSM.Reg(fs.ObjectType, &fs.ObjectContentResolver{}) 327 fs.CSM.Reg(fs.WorkfileType, &fs.WorkfileContentResolver{}) 328 329 // Init meta-owners and load local instances 330 if prev := t.owner.bmd.init(); prev { 331 t.regstate.prevbmd.Store(true) 332 } 333 t.owner.etl.init() 334 335 smap, reliable := t.loadSmap() 336 if !reliable { 337 smap = newSmap() 338 smap.Tmap[t.SID()] = t.si // add self to initial temp smap 339 } else { 340 nlog.Infoln(t.String()+": loaded", smap.StringEx()) 341 } 342 t.owner.smap.put(smap) 343 344 if daemon.cli.target.standby { 345 tstats.Standby(true) 346 t.regstate.disabled.Store(true) 347 nlog.Warningln(t.String(), "not joining - standing by") 348 349 // see endStartupStandby() 350 } else { 351 // discover primary and join cluster (compare with manual `apc.AdminJoin`) 352 if status, err := t.joinCluster(apc.ActSelfJoinTarget); err != nil { 353 nlog.Errorf("%s failed to join cluster: %v(%d)", t, err, status) 354 nlog.Errorln(t.String(), "terminating") 355 return err 356 } 357 t.markNodeStarted() 358 go t.gojoin(config) 359 } 360 361 t.initBackends() 362 363 db, err := kvdb.NewBuntDB(filepath.Join(config.ConfigDir, dbName)) 364 if err != nil { 365 nlog.Errorln(t.String(), "failed to initialize kvdb:", err) 366 return err 367 } 368 369 t.transactions.init(t) 370 371 t.reb = reb.New(config) 372 t.res = res.New() 373 374 // register storage target's handler(s) and start listening 375 t.initRecvHandlers() 376 377 ec.Init() 378 mirror.Init() 379 380 xreg.RegWithHK() 381 382 marked := xreg.GetResilverMarked() 383 if marked.Interrupted || daemon.resilver.required { 384 go t.goresilver(marked.Interrupted) 385 } 386 387 dsort.Tinit(t.statsT, db, config) 388 dload.Init(t.statsT, db, &config.Client) 389 390 err = t.htrun.run(config) 391 392 etl.StopAll() // stop all running ETLs if any 393 cos.Close(db) // close kv db 394 fs.RemoveMarker(fname.NodeRestartedMarker) // exit gracefully 395 return err 396 } 397 398 // apart from minor (albeit subtle) differences between `t.joinCluster` vs `p.joinCluster` 399 // this method is otherwise identical to t.gojoin (TODO: unify) 400 func (t *target) gojoin(config *cmn.Config) { 401 smap := t.owner.smap.get() 402 cii := t.pollClusterStarted(config, smap.Primary) 403 if nlog.Stopping() { 404 return 405 } 406 407 if cii != nil { 408 // (primary changed) 409 primary := cii.Smap.Primary 410 if status, err := t.joinCluster(apc.ActSelfJoinTarget, primary.CtrlURL, primary.PubURL); err != nil { 411 nlog.Errorf(fmtFailedRejoin, t, err, status) 412 return 413 } 414 } 415 t.markClusterStarted() 416 417 if t.fsprg.newVol && !config.TestingEnv() { 418 config := cmn.GCO.BeginUpdate() 419 fspathsSave(config) 420 } 421 nlog.Infoln(t.String(), "is ready") 422 } 423 424 func (t *target) goresilver(interrupted bool) { 425 if interrupted { 426 nlog.Infoln("Resuming resilver...") 427 } else if daemon.resilver.required { 428 nlog.Infof("Starting resilver, reason: %q", daemon.resilver.reason) 429 } 430 t.runResilver(res.Args{}, nil /*wg*/) 431 } 432 433 func (t *target) runResilver(args res.Args, wg *sync.WaitGroup) { 434 // with no cluster-wide UUID it's a local run 435 if args.UUID == "" { 436 args.UUID = cos.GenUUID() 437 regMsg := xactRegMsg{UUID: args.UUID, Kind: apc.ActResilver, Srcs: []string{t.SID()}} 438 msg := t.newAmsgActVal(apc.ActRegGlobalXaction, regMsg) 439 t.bcastAsyncIC(msg) 440 } 441 if wg != nil { 442 wg.Done() // compare w/ xact.GoRunW(() 443 } 444 t.res.RunResilver(args) 445 } 446 447 func (t *target) endStartupStandby() (err error) { 448 smap := t.owner.smap.get() 449 if err = smap.validate(); err != nil { 450 return 451 } 452 daemon.cli.target.standby = false 453 t.markNodeStarted() 454 t.markClusterStarted() 455 t.regstate.disabled.Store(false) 456 tstats := t.statsT.(*stats.Trunner) 457 tstats.Standby(false) 458 nlog.Infof("%s enabled and joined (%s)", t, smap.StringEx()) 459 460 config := cmn.GCO.Get() 461 if t.fsprg.newVol && !config.TestingEnv() { 462 config = cmn.GCO.BeginUpdate() 463 fspathsSave(config) 464 } 465 return 466 } 467 468 func (t *target) initRecvHandlers() { 469 networkHandlers := []networkHandler{ 470 {r: apc.Buckets, h: t.bucketHandler, net: accessNetAll}, 471 {r: apc.Objects, h: t.objectHandler, net: accessNetAll}, 472 {r: apc.Daemon, h: t.daemonHandler, net: accessNetPublicControl}, 473 {r: apc.Metasync, h: t.metasyncHandler, net: accessNetIntraControl}, 474 {r: apc.Health, h: t.healthHandler, net: accessNetPublicControl}, 475 {r: apc.Xactions, h: t.xactHandler, net: accessNetIntraControl}, 476 {r: apc.EC, h: t.ecHandler, net: accessNetIntraData}, 477 {r: apc.Vote, h: t.voteHandler, net: accessNetIntraControl}, 478 {r: apc.Txn, h: t.txnHandler, net: accessNetIntraControl}, 479 {r: apc.ObjStream, h: transport.RxAnyStream, net: accessControlData}, 480 481 {r: apc.Download, h: t.downloadHandler, net: accessNetIntraControl}, 482 {r: apc.Sort, h: dsort.TargetHandler, net: accessControlData}, 483 {r: apc.ETL, h: t.etlHandler, net: accessNetAll}, 484 485 {r: "/" + apc.S3, h: t.s3Handler, net: accessNetPublicData}, 486 {r: "/", h: t.errURL, net: accessNetAll}, 487 } 488 t.regNetHandlers(networkHandlers) 489 } 490 491 func (t *target) checkRestarted(config *cmn.Config) (fatalErr, writeErr error) { 492 if fs.MarkerExists(fname.NodeRestartedMarker) { 493 red := redial{t: t, dialTout: config.Timeout.CplaneOperation.D(), totalTout: config.Timeout.MaxKeepalive.D()} 494 if red.acked() { 495 fatalErr = fmt.Errorf("%s: %q is in use (duplicate or overlapping run?)", t, red.inUse) 496 return 497 } 498 t.statsT.Inc(stats.RestartCount) 499 fs.PersistMarker(fname.NodeRestartedPrev) 500 } 501 fatalErr, writeErr = fs.PersistMarker(fname.NodeRestartedMarker) 502 return 503 } 504 505 // NOTE in re 'node-restarted' scenario: the risk of "overlapping" aisnode run - 506 // which'll fail shortly with "bind: address already in use" but not before 507 // triggering (`NodeRestartedPrev` => GFN) sequence and stealing nlog symlinks 508 // - this risk exists, and that's why we go extra length 509 func (red *redial) acked() bool { 510 var ( 511 err error 512 tsi = red.t.si 513 sleep = cos.ProbingFrequency(red.totalTout) 514 addrs = []string{tsi.PubNet.TCPEndpoint()} 515 once bool 516 ) 517 if ep := red.t.si.DataNet.TCPEndpoint(); ep != addrs[0] { 518 addrs = append(addrs, ep) 519 } else if ep := red.t.si.ControlNet.TCPEndpoint(); ep != addrs[0] { 520 addrs = append(addrs, ep) 521 } 522 for _, addr := range addrs { 523 for elapsed := time.Duration(0); elapsed < red.totalTout; elapsed += sleep { 524 _, err = net.DialTimeout("tcp4", addr, max(2*time.Second, red.dialTout)) 525 if err != nil { 526 break 527 } 528 once = true 529 time.Sleep(sleep) 530 // could be shutting down 531 } 532 if !once { 533 return false 534 } 535 if err == nil { 536 if red.inUse == "" { 537 red.inUse = addr 538 } 539 return true 540 } 541 time.Sleep(sleep) 542 } 543 return false // got tcp synack at least once but not (getting it) any longer 544 } 545 546 // 547 // http handlers 548 // 549 550 func (t *target) errURL(w http.ResponseWriter, r *http.Request) { 551 if r.URL.Scheme != "" { 552 t.writeErrURL(w, r) 553 return 554 } 555 path := r.URL.Path 556 if path != "" && path[0] == '/' { 557 path = path[1:] 558 } 559 split := strings.Split(path, "/") 560 // "easy URL" 561 if len(split) > 0 && 562 (split[0] == apc.GSScheme || split[0] == apc.AZScheme || split[0] == apc.AISScheme) { 563 t.writeErrMsg(w, r, "trying to execute \"easy URL\" via AIS target? (hint: use proxy)") 564 } else { 565 t.writeErrURL(w, r) 566 } 567 } 568 569 // verb /v1/buckets 570 func (t *target) bucketHandler(w http.ResponseWriter, r *http.Request) { 571 switch r.Method { 572 case http.MethodGet: 573 dpq := dpqAlloc() 574 t.httpbckget(w, r, dpq) 575 dpqFree(dpq) 576 case http.MethodDelete: 577 apireq := apiReqAlloc(1, apc.URLPathBuckets.L, false) 578 t.httpbckdelete(w, r, apireq) 579 apiReqFree(apireq) 580 case http.MethodPost: 581 apireq := apiReqAlloc(1, apc.URLPathBuckets.L, false) 582 t.httpbckpost(w, r, apireq) 583 apiReqFree(apireq) 584 case http.MethodHead: 585 apireq := apiReqAlloc(1, apc.URLPathBuckets.L, false) 586 t.httpbckhead(w, r, apireq) 587 apiReqFree(apireq) 588 default: 589 cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodHead, http.MethodPost) 590 } 591 } 592 593 // verb /v1/objects 594 func (t *target) objectHandler(w http.ResponseWriter, r *http.Request) { 595 switch r.Method { 596 case http.MethodGet: 597 apireq := apiReqAlloc(2, apc.URLPathObjects.L, true /*dpq*/) 598 t.httpobjget(w, r, apireq) 599 apiReqFree(apireq) 600 case http.MethodHead: 601 apireq := apiReqAlloc(2, apc.URLPathObjects.L, false) 602 t.httpobjhead(w, r, apireq) 603 apiReqFree(apireq) 604 case http.MethodPut: 605 apireq := apiReqAlloc(2, apc.URLPathObjects.L, true /*dpq*/) 606 if err := t.parseReq(w, r, apireq); err == nil { 607 lom := core.AllocLOM(apireq.items[1]) 608 t.httpobjput(w, r, apireq, lom) 609 core.FreeLOM(lom) 610 } 611 apiReqFree(apireq) 612 case http.MethodDelete: 613 apireq := apiReqAlloc(2, apc.URLPathObjects.L, false) 614 t.httpobjdelete(w, r, apireq) 615 apiReqFree(apireq) 616 case http.MethodPost: 617 apireq := apiReqAlloc(2, apc.URLPathObjects.L, false /*useDpq*/) 618 t.httpobjpost(w, r, apireq) 619 apiReqFree(apireq) 620 case http.MethodPatch: 621 apireq := apiReqAlloc(2, apc.URLPathObjects.L, false) 622 t.httpobjpatch(w, r, apireq) 623 apiReqFree(apireq) 624 default: 625 cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodHead, 626 http.MethodPost, http.MethodPut) 627 } 628 } 629 630 // verb /v1/slices 631 // Non-public inerface 632 func (t *target) ecHandler(w http.ResponseWriter, r *http.Request) { 633 switch r.Method { 634 case http.MethodGet: 635 t.httpecget(w, r) 636 default: 637 cmn.WriteErr405(w, r, http.MethodGet) 638 } 639 } 640 641 // 642 // httpobj* handlers 643 // 644 645 // GET /v1/objects/<bucket-name>/<object-name> 646 // 647 // Initially validates if the request is internal request (either from proxy 648 // or target) and calls getObject. 649 // 650 // Checks if the object exists locally (if not, downloads it) and sends it back 651 // If the bucket is in the Cloud one and ValidateWarmGet is enabled there is an extra 652 // check whether the object exists locally. Version is checked as well if configured. 653 func (t *target) httpobjget(w http.ResponseWriter, r *http.Request, apireq *apiRequest) { 654 err := t.parseReq(w, r, apireq) 655 if err != nil { 656 return 657 } 658 err = apireq.dpq.parse(r.URL.RawQuery) 659 if err != nil { 660 debug.AssertNoErr(err) 661 t.writeErr(w, r, err) 662 return 663 } 664 if cmn.Rom.Features().IsSet(feat.EnforceIntraClusterAccess) { 665 if apireq.dpq.ptime == "" /*isRedirect*/ && t.isIntraCall(r.Header, false /*from primary*/) != nil { 666 t.writeErrf(w, r, "%s: %s(obj) is expected to be redirected (remaddr=%s)", 667 t.si, r.Method, r.RemoteAddr) 668 return 669 } 670 } 671 672 lom := core.AllocLOM(apireq.items[1]) 673 lom, err = t.getObject(w, r, apireq.dpq, apireq.bck, lom) 674 if err != nil { 675 t._erris(w, r, apireq.dpq.silent, err, 0) 676 } 677 core.FreeLOM(lom) 678 } 679 680 func (t *target) getObject(w http.ResponseWriter, r *http.Request, dpq *dpq, bck *meta.Bck, lom *core.LOM) (*core.LOM, error) { 681 if err := lom.InitBck(bck.Bucket()); err != nil { 682 if cmn.IsErrRemoteBckNotFound(err) { 683 t.BMDVersionFixup(r) 684 err = lom.InitBck(bck.Bucket()) 685 } 686 if err != nil { 687 return lom, err 688 } 689 } 690 691 // two special flows 692 if dpq.etlName != "" { 693 t.getETL(w, r, dpq.etlName, bck, lom.ObjName) 694 return lom, nil 695 } 696 if cos.IsParseBool(r.Header.Get(apc.HdrBlobDownload)) { 697 var msg apc.BlobMsg 698 if err := msg.FromHeader(r.Header); err != nil { 699 return lom, err 700 } 701 702 // NOTE: make a blocking call w/ simultaneous Tx 703 args := &core.BlobParams{ 704 RspW: w, 705 Lom: lom, 706 Msg: &msg, 707 } 708 _, _, err := t.blobdl(args, nil /*oa*/) 709 return lom, err 710 } 711 712 // GET: regular | archive | range 713 goi := allocGOI() 714 { 715 goi.atime = time.Now().UnixNano() 716 goi.ltime = mono.NanoTime() 717 if dpq.ptime != "" { 718 if d := ptLatency(goi.atime, dpq.ptime, r.Header.Get(apc.HdrCallerIsPrimary)); d > 0 { 719 t.statsT.Add(stats.GetRedirLatency, d) 720 } 721 } 722 goi.t = t 723 goi.lom = lom 724 goi.dpq = dpq 725 goi.req = r 726 goi.w = w 727 goi.ctx = context.Background() 728 goi.ranges = byteRanges{Range: r.Header.Get(cos.HdrRange), Size: 0} 729 goi.latestVer = _validateWarmGet(goi.lom, dpq.latestVer) // apc.QparamLatestVer || versioning.*_warm_get 730 } 731 if dpq.isArch() { 732 if goi.ranges.Range != "" { 733 details := fmt.Sprintf("range: %s, arch query: %s", goi.ranges.Range, goi.dpq._archstr()) 734 return lom, cmn.NewErrUnsupp("range-read archived content", details) 735 } 736 if dpq.arch.path != "" { 737 if strings.HasPrefix(dpq.arch.path, lom.ObjName) { 738 if rel, err := filepath.Rel(lom.ObjName, dpq.arch.path); err == nil { 739 dpq.arch.path = rel 740 } 741 } 742 } 743 } 744 745 // apc.QparamOrigURL 746 if bck.IsHTTP() { 747 originalURL := dpq.origURL 748 goi.ctx = context.WithValue(goi.ctx, cos.CtxOriginalURL, originalURL) 749 } 750 751 // do 752 if ecode, err := goi.getObject(); err != nil { 753 t.statsT.IncErr(stats.GetCount) 754 755 // handle right here, return nil 756 if err != errSendingResp { 757 if dpq.isS3 { 758 s3.WriteErr(w, r, err, ecode) 759 } else { 760 if ecode == http.StatusNotFound { 761 dpq.silent = true 762 } 763 t._erris(w, r, dpq.silent, err, ecode) 764 } 765 } 766 } 767 lom = goi.lom 768 freeGOI(goi) 769 return lom, nil 770 } 771 772 func _validateWarmGet(lom *core.LOM, latestVer bool /*apc.QparamLatestVer*/) bool { 773 switch { 774 case !lom.Bck().IsCloud() && !lom.Bck().IsRemoteAIS(): 775 return false 776 case !latestVer: 777 return lom.VersionConf().ValidateWarmGet || lom.VersionConf().Sync // bucket prop 778 default: 779 return true 780 } 781 } 782 783 // err in silence 784 func (t *target) _erris(w http.ResponseWriter, r *http.Request, silent bool /*apc.QparamSilent*/, err error, code int) { 785 if silent { 786 t.writeErr(w, r, err, code, Silent) 787 } else { 788 t.writeErr(w, r, err, code) 789 } 790 } 791 792 // PUT /v1/objects/bucket-name/object-name; does: 793 // 1) append object 2) append to archive 3) PUT 794 func (t *target) httpobjput(w http.ResponseWriter, r *http.Request, apireq *apiRequest, lom *core.LOM) { 795 var ( 796 config = cmn.GCO.Get() 797 started = time.Now().UnixNano() 798 t2tput = isT2TPut(r.Header) 799 ) 800 if !t.isValidObjname(w, r, lom.ObjName) { 801 return 802 } 803 if apireq.dpq.ptime == "" && !t2tput { 804 t.writeErrf(w, r, "%s: %s(obj) is expected to be redirected or replicated", t.si, r.Method) 805 return 806 } 807 cs := fs.Cap() 808 if errCap := cs.Err(); errCap != nil || cs.PctMax > int32(config.Space.CleanupWM) { 809 cs = t.OOS(nil) 810 if cs.IsOOS() { 811 // fail this write 812 t.writeErr(w, r, errCap, http.StatusInsufficientStorage) 813 return 814 } 815 } 816 817 // init 818 if err := lom.InitBck(apireq.bck.Bucket()); err != nil { 819 if cmn.IsErrRemoteBckNotFound(err) { 820 t.BMDVersionFixup(r) 821 err = lom.InitBck(apireq.bck.Bucket()) 822 } 823 if err != nil { 824 t.writeErr(w, r, err) 825 return 826 } 827 } 828 829 // load (maybe) 830 skipVC := lom.IsFeatureSet(feat.SkipVC) || apireq.dpq.skipVC 831 if !skipVC { 832 _ = lom.Load(true, false) 833 } 834 835 // do 836 var ( 837 handle string 838 err error 839 ecode int 840 ) 841 switch { 842 case apireq.dpq.arch.path != "": // apc.QparamArchpath 843 apireq.dpq.arch.mime, err = archive.MimeFQN(t.smm, apireq.dpq.arch.mime, lom.FQN) 844 if err != nil { 845 break 846 } 847 // do 848 lom.Lock(true) 849 ecode, err = t.putApndArch(r, lom, started, apireq.dpq) 850 lom.Unlock(true) 851 case apireq.dpq.apnd.ty != "": // apc.QparamAppendType 852 a := &apndOI{ 853 started: started, 854 t: t, 855 config: config, 856 lom: lom, 857 r: r.Body, 858 op: apireq.dpq.apnd.ty, // apc.QparamAppendType 859 } 860 if err := a.parse(apireq.dpq.apnd.hdl /*apc.QparamAppendHandle*/); err != nil { 861 t.writeErr(w, r, err) 862 return 863 } 864 handle, ecode, err = a.do(r) 865 if err == nil && handle != "" { 866 w.Header().Set(apc.HdrAppendHandle, handle) 867 return 868 } 869 t.statsT.IncErr(stats.AppendCount) 870 default: 871 poi := allocPOI() 872 { 873 poi.atime = started 874 if apireq.dpq.ptime != "" { 875 if d := ptLatency(poi.atime, apireq.dpq.ptime, r.Header.Get(apc.HdrCallerIsPrimary)); d > 0 { 876 t.statsT.Add(stats.PutRedirLatency, d) 877 } 878 } 879 poi.t = t 880 poi.lom = lom 881 poi.config = config 882 poi.skipVC = skipVC // feat.SkipVC || apc.QparamSkipVC 883 poi.restful = true 884 poi.t2t = t2tput 885 } 886 ecode, err = poi.do(w.Header(), r, apireq.dpq) 887 freePOI(poi) 888 } 889 if err != nil { 890 t.fsErr(err, lom.FQN) 891 t.writeErr(w, r, err, ecode) 892 } 893 } 894 895 // DELETE [ { action } ] /v1/objects/bucket-name/object-name 896 func (t *target) httpobjdelete(w http.ResponseWriter, r *http.Request, apireq *apiRequest) { 897 var msg aisMsg 898 if err := readJSON(w, r, &msg); err != nil { 899 return 900 } 901 if err := t.parseReq(w, r, apireq); err != nil { 902 return 903 } 904 objName := apireq.items[1] 905 if !t.isValidObjname(w, r, objName) { 906 return 907 } 908 if isRedirect(apireq.query) == "" { 909 t.writeErrf(w, r, "%s: %s(obj) is expected to be redirected", t.si, r.Method) 910 return 911 } 912 913 evict := msg.Action == apc.ActEvictObjects 914 lom := core.AllocLOM(objName) 915 if err := lom.InitBck(apireq.bck.Bucket()); err != nil { 916 t.writeErr(w, r, err) 917 core.FreeLOM(lom) 918 return 919 } 920 921 ecode, err := t.DeleteObject(lom, evict) 922 if err == nil && ecode == 0 { 923 // EC cleanup if EC is enabled 924 ec.ECM.CleanupObject(lom) 925 } else { 926 if ecode == http.StatusNotFound { 927 t.writeErrSilentf(w, r, http.StatusNotFound, "%s doesn't exist", lom.Cname()) 928 } else { 929 t.writeErr(w, r, err, ecode) 930 } 931 } 932 core.FreeLOM(lom) 933 } 934 935 // POST /v1/objects/bucket-name/object-name 936 func (t *target) httpobjpost(w http.ResponseWriter, r *http.Request, apireq *apiRequest) { 937 msg, err := t.readActionMsg(w, r) 938 if err != nil { 939 return 940 } 941 if msg.Action == apc.ActBlobDl { 942 apireq.after = 1 943 } 944 if t.parseReq(w, r, apireq) != nil { 945 return 946 } 947 if isRedirect(apireq.query) == "" { 948 t.writeErrf(w, r, "%s: %s-%s(obj) is expected to be redirected", t.si, r.Method, msg.Action) 949 return 950 } 951 var lom *core.LOM 952 switch msg.Action { 953 case apc.ActRenameObject: 954 lom = core.AllocLOM(apireq.items[1]) 955 if err = lom.InitBck(apireq.bck.Bucket()); err != nil { 956 break 957 } 958 if err = t.objMv(lom, msg); err == nil { 959 t.statsT.Inc(stats.RenameCount) 960 core.FreeLOM(lom) 961 lom = nil 962 } else { 963 t.statsT.IncErr(stats.RenameCount) 964 } 965 case apc.ActBlobDl: 966 var ( 967 xid string 968 objName = msg.Name 969 blobMsg apc.BlobMsg 970 ) 971 lom = core.AllocLOM(objName) 972 if err = lom.InitBck(apireq.bck.Bucket()); err != nil { 973 break 974 } 975 if err = cos.MorphMarshal(msg.Value, &blobMsg); err != nil { 976 err = fmt.Errorf(cmn.FmtErrMorphUnmarshal, t, "set-custom", msg.Value, err) 977 break 978 } 979 args := &core.BlobParams{ 980 Lom: lom, 981 Msg: &blobMsg, 982 } 983 if xid, _, err = t.blobdl(args, nil /*oa*/); xid != "" { 984 debug.AssertNoErr(err) 985 w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(xid))) 986 w.Write([]byte(xid)) 987 // lom is eventually freed by x-blob 988 } 989 default: 990 t.writeErrAct(w, r, msg.Action) 991 return 992 } 993 if err != nil { 994 t.writeErr(w, r, err) 995 core.FreeLOM(lom) 996 } 997 } 998 999 // HEAD /v1/objects/<bucket-name>/<object-name> 1000 func (t *target) httpobjhead(w http.ResponseWriter, r *http.Request, apireq *apiRequest) { 1001 if err := t.parseReq(w, r, apireq); err != nil { 1002 return 1003 } 1004 query, bck, objName := apireq.query, apireq.bck, apireq.items[1] 1005 if cmn.Rom.Features().IsSet(feat.EnforceIntraClusterAccess) { 1006 // validates that the request is internal (by a node in the same cluster) 1007 if isRedirect(query) == "" && t.isIntraCall(r.Header, false) != nil { 1008 t.writeErrf(w, r, "%s: %s(obj) is expected to be redirected (remaddr=%s)", 1009 t.si, r.Method, r.RemoteAddr) 1010 return 1011 } 1012 } 1013 lom := core.AllocLOM(objName) 1014 ecode, err := t.objHead(w.Header(), query, bck, lom) 1015 core.FreeLOM(lom) 1016 if err != nil { 1017 t._erris(w, r, cos.IsParseBool(query.Get(apc.QparamSilent)), err, ecode) 1018 } 1019 } 1020 1021 func (t *target) objHead(hdr http.Header, query url.Values, bck *meta.Bck, lom *core.LOM) (ecode int, err error) { 1022 var ( 1023 fltPresence int 1024 exists = true 1025 hasEC bool 1026 ) 1027 if tmp := query.Get(apc.QparamFltPresence); tmp != "" { 1028 var erp error 1029 fltPresence, erp = strconv.Atoi(tmp) 1030 debug.AssertNoErr(erp) 1031 } 1032 if err = lom.InitBck(bck.Bucket()); err != nil { 1033 if cmn.IsErrBucketNought(err) { 1034 ecode = http.StatusNotFound 1035 } 1036 return 1037 } 1038 err = lom.Load(true /*cache it*/, false /*locked*/) 1039 if err == nil { 1040 if apc.IsFltNoProps(fltPresence) { 1041 return 1042 } 1043 if fltPresence == apc.FltExistsOutside { 1044 err = fmt.Errorf(fmtOutside, lom.Cname(), fltPresence) 1045 return 1046 } 1047 } else { 1048 if !cmn.IsErrObjNought(err) { 1049 return 1050 } 1051 exists = false 1052 if fltPresence == apc.FltPresentCluster { 1053 exists = lom.RestoreToLocation() 1054 } 1055 } 1056 1057 if !exists { 1058 if bck.IsAIS() || apc.IsFltPresent(fltPresence) { 1059 err = cos.NewErrNotFound(t, lom.Cname()) 1060 return http.StatusNotFound, err 1061 } 1062 } 1063 1064 // props 1065 op := cmn.ObjectProps{Name: lom.ObjName, Bck: *lom.Bucket(), Present: exists} 1066 if exists { 1067 op.ObjAttrs = *lom.ObjAttrs() 1068 op.Location = lom.Location() 1069 op.Mirror.Copies = lom.NumCopies() 1070 if lom.HasCopies() { 1071 lom.Lock(false) 1072 for fs := range lom.GetCopies() { 1073 if idx := strings.Index(fs, "/@"); idx >= 0 { 1074 fs = fs[:idx] 1075 } 1076 op.Mirror.Paths = append(op.Mirror.Paths, fs) 1077 } 1078 lom.Unlock(false) 1079 } else { 1080 fs := lom.FQN 1081 if idx := strings.Index(fs, "/@"); idx >= 0 { 1082 fs = fs[:idx] 1083 } 1084 op.Mirror.Paths = append(op.Mirror.Paths, fs) 1085 } 1086 if lom.ECEnabled() { 1087 if md, err := ec.ObjectMetadata(lom.Bck(), lom.ObjName); err == nil { 1088 hasEC = true 1089 op.EC.DataSlices = md.Data 1090 op.EC.ParitySlices = md.Parity 1091 op.EC.IsECCopy = md.IsCopy 1092 op.EC.Generation = md.Generation 1093 } 1094 } 1095 } else { 1096 // cold HEAD 1097 var oa *cmn.ObjAttrs 1098 oa, ecode, err = t.Backend(lom.Bck()).HeadObj(context.Background(), lom, nil /*origReq*/) 1099 if err != nil { 1100 if ecode != http.StatusNotFound { 1101 err = cmn.NewErrFailedTo(t, "HEAD", lom.Cname(), err) 1102 } 1103 return 1104 } 1105 if apc.IsFltNoProps(fltPresence) { 1106 return 1107 } 1108 op.ObjAttrs = *oa 1109 op.ObjAttrs.Atime = 0 1110 } 1111 1112 // to header 1113 cmn.ToHeader(&op.ObjAttrs, hdr, op.ObjAttrs.Size) 1114 if op.ObjAttrs.Cksum == nil { 1115 // cos.Cksum does not have default nil/zero value (reflection) 1116 op.ObjAttrs.Cksum = cos.NewCksum("", "") 1117 } 1118 errIter := cmn.IterFields(op, func(tag string, field cmn.IterField) (err error, b bool) { 1119 if !hasEC && strings.HasPrefix(tag, "ec.") { 1120 return nil, false 1121 } 1122 // NOTE: op.ObjAttrs were already added via cmn.ToHeader 1123 if tag[0] == '.' { 1124 return nil, false 1125 } 1126 v := field.String() 1127 if v == "" { 1128 return nil, false 1129 } 1130 name := apc.PropToHeader(tag) 1131 debug.Func(func() { 1132 vv := hdr.Get(name) 1133 debug.Assertf(vv == "", "not expecting duplications: %s=(%q, %q)", name, v, vv) 1134 }) 1135 hdr.Set(name, v) 1136 return nil, false 1137 }) 1138 debug.AssertNoErr(errIter) 1139 return 1140 } 1141 1142 // PATCH /v1/objects/<bucket-name>/<object-name> 1143 // By default, adds or updates existing custom keys. Will remove all existing keys and 1144 // replace them with the specified ones _iff_ `apc.QparamNewCustom` is set. 1145 func (t *target) httpobjpatch(w http.ResponseWriter, r *http.Request, apireq *apiRequest) { 1146 if err := t.parseReq(w, r, apireq); err != nil { 1147 return 1148 } 1149 if cmn.Rom.Features().IsSet(feat.EnforceIntraClusterAccess) { 1150 if isRedirect(apireq.query) == "" && t.isIntraCall(r.Header, false) != nil { 1151 t.writeErrf(w, r, "%s: %s(obj) is expected to be redirected (remaddr=%s)", 1152 t.si, r.Method, r.RemoteAddr) 1153 return 1154 } 1155 } 1156 msg, err := t.readActionMsg(w, r) 1157 if err != nil { 1158 return 1159 } 1160 custom := cos.StrKVs{} 1161 if err := cos.MorphMarshal(msg.Value, &custom); err != nil { 1162 t.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, t.si, "set-custom", msg.Value, err) 1163 return 1164 } 1165 lom := core.AllocLOM(apireq.items[1] /*objName*/) 1166 defer core.FreeLOM(lom) 1167 if !t.isValidObjname(w, r, lom.ObjName) { 1168 return 1169 } 1170 if err := lom.InitBck(apireq.bck.Bucket()); err != nil { 1171 t.writeErr(w, r, err) 1172 return 1173 } 1174 if err := lom.Load(true /*cache it*/, false /*locked*/); err != nil { 1175 if cos.IsNotExist(err, 0) { 1176 t.writeErr(w, r, err, http.StatusNotFound) 1177 } else { 1178 t.writeErr(w, r, err) 1179 } 1180 return 1181 } 1182 delOldSetNew := cos.IsParseBool(apireq.query.Get(apc.QparamNewCustom)) 1183 if delOldSetNew { 1184 lom.SetCustomMD(custom) 1185 } else { 1186 for key, val := range custom { 1187 lom.SetCustomKey(key, val) 1188 } 1189 } 1190 lom.Persist() 1191 } 1192 1193 // 1194 // httpec* handlers 1195 // 1196 1197 // Returns a slice. Does not use GFN. 1198 func (t *target) httpecget(w http.ResponseWriter, r *http.Request) { 1199 apireq := apiReqAlloc(3, apc.URLPathEC.L, false) 1200 apireq.bckIdx = 1 1201 if err := t.parseReq(w, r, apireq); err != nil { 1202 apiReqFree(apireq) 1203 return 1204 } 1205 switch apireq.items[0] { 1206 case ec.URLMeta: 1207 t.sendECMetafile(w, r, apireq.bck, apireq.items[2]) 1208 case ec.URLCT: 1209 lom := core.AllocLOM(apireq.items[2]) 1210 t.sendECCT(w, r, apireq.bck, lom) 1211 core.FreeLOM(lom) 1212 default: 1213 t.writeErrURL(w, r) 1214 } 1215 apiReqFree(apireq) 1216 } 1217 1218 // Returns a CT's metadata. 1219 func (t *target) sendECMetafile(w http.ResponseWriter, r *http.Request, bck *meta.Bck, objName string) { 1220 if err := bck.Init(t.owner.bmd); err != nil { 1221 if !cmn.IsErrRemoteBckNotFound(err) { // is ais 1222 t.writeErr(w, r, err, Silent) 1223 return 1224 } 1225 } 1226 md, err := ec.ObjectMetadata(bck, objName) 1227 if err != nil { 1228 if os.IsNotExist(err) { 1229 t.writeErr(w, r, err, http.StatusNotFound, Silent) 1230 } else { 1231 t.writeErr(w, r, err, http.StatusInternalServerError, Silent) 1232 } 1233 return 1234 } 1235 b := md.NewPack() 1236 w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(b))) 1237 w.Write(b) 1238 } 1239 1240 func (t *target) sendECCT(w http.ResponseWriter, r *http.Request, bck *meta.Bck, lom *core.LOM) { 1241 if err := lom.InitBck(bck.Bucket()); err != nil { 1242 if cmn.IsErrRemoteBckNotFound(err) { 1243 t.BMDVersionFixup(r) 1244 err = lom.InitBck(bck.Bucket()) 1245 } 1246 if err != nil { 1247 t.writeErr(w, r, err) 1248 return 1249 } 1250 } 1251 sliceFQN := lom.Mountpath().MakePathFQN(bck.Bucket(), fs.ECSliceType, lom.ObjName) 1252 finfo, err := os.Stat(sliceFQN) 1253 if err != nil { 1254 t.writeErr(w, r, err, http.StatusNotFound, Silent) 1255 return 1256 } 1257 file, err := os.Open(sliceFQN) 1258 if err != nil { 1259 t.fsErr(err, sliceFQN) 1260 t.writeErr(w, r, err, http.StatusInternalServerError) 1261 return 1262 } 1263 1264 w.Header().Set(cos.HdrContentLength, strconv.FormatInt(finfo.Size(), 10)) 1265 _, err = io.Copy(w, file) // No need for `io.CopyBuffer` as `sendfile` syscall will be used. 1266 cos.Close(file) 1267 if err != nil { 1268 nlog.Errorf("Failed to send slice %s: %v", lom.Cname(), err) 1269 } 1270 } 1271 1272 // called under lock 1273 func (t *target) putApndArch(r *http.Request, lom *core.LOM, started int64, dpq *dpq) (int, error) { 1274 var ( 1275 mime = dpq.arch.mime // apc.QparamArchmime 1276 filename = dpq.arch.path // apc.QparamArchpath 1277 flags int64 1278 ) 1279 if strings.HasPrefix(filename, lom.ObjName) { 1280 if rel, err := filepath.Rel(lom.ObjName, filename); err == nil { 1281 filename = rel 1282 } 1283 } 1284 if s := r.Header.Get(apc.HdrPutApndArchFlags); s != "" { 1285 var errV error 1286 if flags, errV = strconv.ParseInt(s, 10, 64); errV != nil { 1287 return http.StatusBadRequest, 1288 fmt.Errorf("failed to archive %s: invalid flags %q in the request", lom.Cname(), s) 1289 } 1290 } 1291 a := &putA2I{ 1292 started: started, 1293 t: t, 1294 lom: lom, 1295 r: r.Body, 1296 filename: filename, 1297 mime: mime, 1298 put: false, // below 1299 } 1300 if err := lom.Load(false /*cache it*/, true /*locked*/); err != nil { 1301 if !os.IsNotExist(err) { 1302 return http.StatusInternalServerError, err 1303 } 1304 if flags == apc.ArchAppend { 1305 return http.StatusNotFound, err 1306 } 1307 a.put = true 1308 } else { 1309 a.put = (flags == 0) 1310 } 1311 if s := r.Header.Get(cos.HdrContentLength); s != "" { 1312 if size, err := strconv.ParseInt(s, 10, 64); err == nil { 1313 a.size = size 1314 } 1315 } 1316 if a.size == 0 { 1317 return http.StatusBadRequest, fmt.Errorf("failed to archive %s: missing %q in the request", 1318 lom.Cname(), cos.HdrContentLength) 1319 } 1320 return a.do() 1321 } 1322 1323 func (t *target) DeleteObject(lom *core.LOM, evict bool) (code int, err error) { 1324 var isback bool 1325 lom.Lock(true) 1326 code, err, isback = t.delobj(lom, evict) 1327 lom.Unlock(true) 1328 1329 // special corner-case retry (quote): 1330 // - googleapi: "Error 503: We encountered an internal error. Please try again." 1331 // - aws-error[InternalError: We encountered an internal error. Please try again.] 1332 if err != nil && isback { 1333 if code == http.StatusServiceUnavailable || strings.Contains(err.Error(), "try again") { 1334 nlog.Errorf("failed to delete %s: %v(%d) - retrying...", lom, err, code) 1335 time.Sleep(time.Second) 1336 code, err = t.Backend(lom.Bck()).DeleteObj(lom) 1337 } 1338 } 1339 if err == nil { 1340 t.statsT.Inc(stats.DeleteCount) 1341 } else { 1342 t.statsT.IncErr(stats.DeleteCount) // TODO: count GET/PUT/DELETE remote errors separately.. 1343 } 1344 return 1345 } 1346 1347 func (t *target) delobj(lom *core.LOM, evict bool) (int, error, bool) { 1348 var ( 1349 aisErr, backendErr error 1350 aisErrCode, backendErrCode int 1351 delFromAIS, delFromBackend bool 1352 ) 1353 delFromBackend = lom.Bck().IsRemote() && !evict 1354 err := lom.Load(false /*cache it*/, true /*locked*/) 1355 if err != nil { 1356 if !cos.IsNotExist(err, 0) { 1357 return 0, err, false 1358 } 1359 if !delFromBackend { 1360 return http.StatusNotFound, err, false 1361 } 1362 } else { 1363 delFromAIS = true 1364 } 1365 1366 // do 1367 if delFromBackend { 1368 backendErrCode, backendErr = t.Backend(lom.Bck()).DeleteObj(lom) 1369 } 1370 if delFromAIS { 1371 size := lom.SizeBytes() 1372 aisErr = lom.Remove() 1373 if aisErr != nil { 1374 if !os.IsNotExist(aisErr) { 1375 if backendErr != nil { 1376 // unlikely 1377 nlog.Errorf("double-failure to delete %s: ais err %v, backend err %v(%d)", 1378 lom, aisErr, backendErr, backendErrCode) 1379 } 1380 return 0, aisErr, false 1381 } 1382 } else if evict { 1383 debug.Assert(lom.Bck().IsRemote()) 1384 t.statsT.AddMany( 1385 cos.NamedVal64{Name: stats.LruEvictCount, Value: 1}, 1386 cos.NamedVal64{Name: stats.LruEvictSize, Value: size}, 1387 ) 1388 } 1389 } 1390 if backendErr != nil { 1391 return backendErrCode, backendErr, true 1392 } 1393 return aisErrCode, aisErr, false 1394 } 1395 1396 // rename obj 1397 func (t *target) objMv(lom *core.LOM, msg *apc.ActMsg) (err error) { 1398 if lom.Bck().IsRemote() { 1399 return fmt.Errorf("%s: cannot rename object %s from remote bucket", t.si, lom) 1400 } 1401 if lom.ECEnabled() { 1402 return fmt.Errorf("%s: cannot rename erasure-coded object %s", t.si, lom) 1403 } 1404 if msg.Name == lom.ObjName { 1405 return fmt.Errorf("%s: cannot rename/move object %s onto itself", t.si, lom) 1406 } 1407 1408 buf, slab := t.gmm.Alloc() 1409 coiParams := core.AllocCOI() 1410 { 1411 coiParams.BckTo = lom.Bck() 1412 coiParams.ObjnameTo = msg.Name /* new object name */ 1413 coiParams.Buf = buf 1414 coiParams.Config = cmn.GCO.Get() 1415 coiParams.OWT = cmn.OwtCopy 1416 coiParams.Finalize = true 1417 } 1418 coi := (*copyOI)(coiParams) 1419 _, err = coi.do(t, nil /*DM*/, lom) 1420 core.FreeCOI(coiParams) 1421 slab.Free(buf) 1422 if err != nil { 1423 return err 1424 } 1425 1426 // TODO: combine copy+delete under a single write lock 1427 lom.Lock(true) 1428 if err := lom.Remove(); err != nil { 1429 nlog.Warningf("%s: failed to delete renamed object %s (new name %s): %v", t, lom, msg.Name, err) 1430 } 1431 lom.Unlock(true) 1432 return nil 1433 } 1434 1435 // compare running the same via (generic) t.xstart 1436 func (t *target) blobdl(params *core.BlobParams, oa *cmn.ObjAttrs) (string, *xs.XactBlobDl, error) { 1437 // cap 1438 cs := fs.Cap() 1439 if errCap := cs.Err(); errCap != nil { 1440 cs = t.OOS(nil) 1441 if err := cs.Err(); err != nil { 1442 return "", nil, err 1443 } 1444 } 1445 1446 if oa != nil { 1447 return _blobdl(params, oa) 1448 } 1449 1450 // - try-lock (above) to load, check availability 1451 // - unlock right away 1452 // - subsequently, use cmn.OwtGetPrefetchLock to finalize 1453 // - there's a single x-blob-download per object (see WhenPrevIsRunning) 1454 lom, latestVer := params.Lom, params.Msg.LatestVer 1455 if !lom.TryLock(false) { 1456 return "", nil, cmn.NewErrBusy("blob", lom.Cname()) 1457 } 1458 1459 oa, deleted, err := lom.LoadLatest(latestVer) 1460 lom.Unlock(false) 1461 1462 // w/ assorted returns 1463 switch { 1464 case deleted: // remotely 1465 debug.Assert(latestVer && err != nil) 1466 return "", nil, err 1467 case oa != nil: 1468 debug.Assert(latestVer && err == nil) 1469 // not latest 1470 case err == nil: 1471 return "", nil, nil // nothing to do 1472 case !cmn.IsErrObjNought(err): 1473 return "", nil, err 1474 } 1475 1476 // handle: (not-present || latest-not-eq) 1477 return _blobdl(params, oa) 1478 } 1479 1480 // returns an empty xid ("") if nothing to do 1481 func _blobdl(params *core.BlobParams, oa *cmn.ObjAttrs) (string, *xs.XactBlobDl, error) { 1482 if params.WriteSGL == nil { 1483 // regular lom save (custom writer not present) 1484 wfqn := fs.CSM.Gen(params.Lom, fs.WorkfileType, "blob-dl") 1485 lmfh, err := params.Lom.CreateFile(wfqn) 1486 if err != nil { 1487 return "", nil, err 1488 } 1489 params.Lmfh = lmfh 1490 params.Wfqn = wfqn 1491 } 1492 // new 1493 xid := cos.GenUUID() 1494 rns := xs.RenewBlobDl(xid, params, oa) 1495 if rns.Err != nil || rns.IsRunning() { // cmn.IsErrXactUsePrev(rns.Err): single blob-downloader per blob 1496 if params.Lmfh != nil { 1497 cos.Close(params.Lmfh) 1498 } 1499 if params.Wfqn != "" { 1500 if errRemove := cos.RemoveFile(params.Wfqn); errRemove != nil { 1501 nlog.Errorln("nested err", errRemove) 1502 } 1503 } 1504 return "", nil, rns.Err 1505 } 1506 1507 // a) via x-start, x-blob-download 1508 xblob := rns.Entry.Get().(*xs.XactBlobDl) 1509 if params.RspW == nil { 1510 go xblob.Run(nil) 1511 return xblob.ID(), xblob, nil 1512 } 1513 // b) via GET (blocking w/ simultaneous transmission) 1514 xblob.Run(nil) 1515 return "", nil, xblob.AbortErr() 1516 } 1517 1518 func (t *target) fsErr(err error, filepath string) { 1519 if !cmn.GCO.Get().FSHC.Enabled || !cos.IsIOError(err) { 1520 return 1521 } 1522 mi, _ := fs.Path2Mpath(filepath) 1523 if mi == nil { 1524 return 1525 } 1526 if cos.IsErrOOS(err) { 1527 cs := t.OOS(nil) 1528 nlog.Errorf("%s: fsErr %s", t, cs.String()) 1529 return 1530 } 1531 nlog.Errorf("%s: waking up FSHC to check %q for err %v", t, filepath, err) 1532 keyName := mi.Path 1533 // keyName is the mountpath is the fspath - counting IO errors on a per basis.. 1534 t.statsT.AddMany(cos.NamedVal64{Name: stats.ErrIOCount, NameSuffix: keyName, Value: 1}) 1535 t.fshc.OnErr(filepath) 1536 }