github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/tgttxn.go (about) 1 // Package ais provides core functionality for the AIStore object storage. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ais 6 7 import ( 8 "errors" 9 "fmt" 10 "net/http" 11 "net/url" 12 "os" 13 "strconv" 14 "strings" 15 "time" 16 17 "github.com/NVIDIA/aistore/api/apc" 18 "github.com/NVIDIA/aistore/cmn" 19 "github.com/NVIDIA/aistore/cmn/archive" 20 "github.com/NVIDIA/aistore/cmn/cos" 21 "github.com/NVIDIA/aistore/cmn/debug" 22 "github.com/NVIDIA/aistore/cmn/feat" 23 "github.com/NVIDIA/aistore/cmn/k8s" 24 "github.com/NVIDIA/aistore/cmn/nlog" 25 "github.com/NVIDIA/aistore/core" 26 "github.com/NVIDIA/aistore/core/meta" 27 "github.com/NVIDIA/aistore/ext/etl" 28 "github.com/NVIDIA/aistore/fs" 29 "github.com/NVIDIA/aistore/nl" 30 "github.com/NVIDIA/aistore/reb" 31 "github.com/NVIDIA/aistore/xact" 32 "github.com/NVIDIA/aistore/xact/xreg" 33 "github.com/NVIDIA/aistore/xact/xs" 34 jsoniter "github.com/json-iterator/go" 35 ) 36 37 const ActCleanup = "cleanup" // in addition to (apc.ActBegin, ...) 38 39 // context structure to gather all (or most) of the relevant state in one place 40 // (compare with txnCln) 41 type txnSrv struct { 42 t *target 43 msg *aisMsg 44 bck *meta.Bck // aka bckFrom 45 bckTo *meta.Bck 46 query url.Values 47 uuid string 48 phase string 49 callerName string 50 callerID string 51 timeout struct { 52 netw time.Duration 53 host time.Duration 54 } 55 } 56 57 // TODO: return xaction ID (xid) where applicable 58 59 // verb /v1/txn 60 func (t *target) txnHandler(w http.ResponseWriter, r *http.Request) { 61 var bucket, phase, xid string 62 if r.Method != http.MethodPost { 63 cmn.WriteErr405(w, r, http.MethodPost) 64 return 65 } 66 msg, err := t.readAisMsg(w, r) 67 if err != nil { 68 return 69 } 70 71 xactRecord := xact.Table[msg.Action] 72 onlyPrimary := xactRecord.Metasync 73 if !t.ensureIntraControl(w, r, onlyPrimary) { 74 return 75 } 76 77 apiItems, err := t.parseURL(w, r, apc.URLPathTxn.L, 0, true) 78 if err != nil { 79 return 80 } 81 switch len(apiItems) { 82 case 1: // Global transaction. 83 phase = apiItems[0] 84 case 2: // Bucket-based transaction. 85 bucket, phase = apiItems[0], apiItems[1] 86 default: 87 t.writeErrURL(w, r) 88 return 89 } 90 91 c := &txnSrv{t: t, msg: msg, phase: phase} 92 if err := c.init(r, bucket); err != nil { 93 t.writeErr(w, r, err) 94 return 95 } 96 97 switch msg.Action { 98 case apc.ActCreateBck, apc.ActAddRemoteBck: 99 err = t.createBucket(c) 100 case apc.ActMakeNCopies: 101 xid, err = t.makeNCopies(c) 102 case apc.ActSetBprops, apc.ActResetBprops: 103 xid, err = t.setBprops(c) 104 case apc.ActMoveBck: 105 xid, err = t.renameBucket(c) 106 case apc.ActCopyBck, apc.ActETLBck: 107 var ( 108 dp core.DP 109 tcbmsg = &apc.TCBMsg{} 110 ) 111 if err := cos.MorphMarshal(c.msg.Value, tcbmsg); err != nil { 112 t.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, t.si, msg.Action, c.msg.Value, err) 113 return 114 } 115 if msg.Action == apc.ActETLBck { 116 var err error 117 if dp, err = etlDP(tcbmsg); err != nil { 118 t.writeErr(w, r, err) 119 return 120 } 121 } 122 xid, err = t.tcb(c, tcbmsg, dp) 123 case apc.ActCopyObjects, apc.ActETLObjects: 124 var ( 125 dp core.DP 126 tcomsg = &cmn.TCObjsMsg{} 127 ) 128 if err := cos.MorphMarshal(c.msg.Value, tcomsg); err != nil { 129 t.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, t.si, msg.Action, c.msg.Value, err) 130 return 131 } 132 if msg.Action == apc.ActETLObjects { 133 cs := fs.Cap() 134 if err := cs.Err(); err != nil { 135 t.writeErr(w, r, err, http.StatusInsufficientStorage) 136 return 137 } 138 var err error 139 if dp, err = etlDP(&tcomsg.TCBMsg); err != nil { 140 t.writeErr(w, r, err) 141 return 142 } 143 } 144 xid, err = t.tcobjs(c, tcomsg, dp) 145 case apc.ActECEncode: 146 xid, err = t.ecEncode(c) 147 case apc.ActArchive: 148 xid, err = t.createArchMultiObj(c) 149 case apc.ActStartMaintenance, apc.ActDecommissionNode, apc.ActShutdownNode: 150 err = t.beginRm(c) 151 case apc.ActDestroyBck, apc.ActEvictRemoteBck: 152 err = t.destroyBucket(c) 153 case apc.ActPromote: 154 hdr := w.Header() 155 xid, err = t.promote(c, hdr) 156 default: 157 t.writeErrAct(w, r, msg.Action) 158 } 159 if err == nil { 160 if xid != "" { 161 w.Header().Set(apc.HdrXactionID, xid) 162 } 163 return 164 } 165 166 // cleanup on error 167 t.transactions.find(c.uuid, ActCleanup) 168 169 if cmn.IsErrCapExceeded(err) { 170 cs := t.OOS(nil) 171 t.writeErrStatusf(w, r, http.StatusInsufficientStorage, "%s: %v", cs.String(), err) 172 } else { 173 t.writeErr(w, r, err) 174 } 175 } 176 177 // 178 // createBucket 179 // 180 181 func (t *target) createBucket(c *txnSrv) error { 182 switch c.phase { 183 case apc.ActBegin: 184 txn := newTxnCreateBucket(c) 185 if err := t.transactions.begin(txn); err != nil { 186 return err 187 } 188 if c.msg.Action == apc.ActCreateBck && c.bck.IsRemote() { 189 if c.msg.Value != nil { 190 if err := cos.MorphMarshal(c.msg.Value, &c.bck.Props); err != nil { 191 return fmt.Errorf(cmn.FmtErrMorphUnmarshal, t, c.msg.Action, c.msg.Value, err) 192 } 193 } 194 if _, err := t.Backend(c.bck).CreateBucket(c.bck); err != nil { 195 return err 196 } 197 } 198 case apc.ActAbort: 199 t.transactions.find(c.uuid, apc.ActAbort) 200 case apc.ActCommit: 201 t._commitCreateDestroy(c) 202 default: 203 debug.Assert(false) 204 } 205 return nil 206 } 207 208 func (t *target) _commitCreateDestroy(c *txnSrv) (err error) { 209 txn, err := t.transactions.find(c.uuid, "") 210 if err != nil { 211 return err 212 } 213 // wait for newBMD w/timeout 214 if err = t.transactions.wait(txn, c.timeout.netw, c.timeout.host); err != nil { 215 return cmn.NewErrFailedTo(t, "commit", txn, err) 216 } 217 return 218 } 219 220 // 221 // makeNCopies 222 // 223 224 func (t *target) makeNCopies(c *txnSrv) (string, error) { 225 switch c.phase { 226 case apc.ActBegin: 227 if err := c.bck.Init(t.owner.bmd); err != nil { 228 return "", err 229 } 230 curCopies, newCopies, err := t.validateMakeNCopies(c.bck, c.msg) 231 if err != nil { 232 return "", err 233 } 234 cs := fs.Cap() 235 if err := cs.Err(); err != nil { 236 return "", err 237 } 238 nlp := newBckNLP(c.bck) 239 if !nlp.TryLock(c.timeout.netw / 2) { 240 return "", cmn.NewErrBusy("bucket", c.bck.Cname("")) 241 } 242 txn := newTxnMakeNCopies(c, curCopies, newCopies) 243 if err := t.transactions.begin(txn, nlp); err != nil { 244 return "", err 245 } 246 case apc.ActAbort: 247 t.transactions.find(c.uuid, apc.ActAbort) 248 case apc.ActCommit: 249 if err := c.bck.Init(t.owner.bmd); err != nil { 250 return "", err 251 } 252 copies, err := _parseNCopies(c.msg.Value) 253 debug.AssertNoErr(err) 254 txn, err := t.transactions.find(c.uuid, "") 255 if err != nil { 256 return "", err 257 } 258 txnMnc := txn.(*txnMakeNCopies) 259 debug.Assert(txnMnc.newCopies == copies) 260 261 // wait for newBMD w/timeout 262 if err = t.transactions.wait(txn, c.timeout.netw, c.timeout.host); err != nil { 263 return "", cmn.NewErrFailedTo(t, "commit", txn, err) 264 } 265 266 // do the work in xaction 267 rns := xreg.RenewBckMakeNCopies(c.bck, c.uuid, "mnc-actmnc", int(copies)) 268 if rns.Err != nil { 269 return "", fmt.Errorf("%s %s: %v", t, txn, rns.Err) 270 } 271 xctn := rns.Entry.Get() 272 flt := xreg.Flt{Kind: apc.ActPutCopies, Bck: c.bck} 273 xreg.DoAbort(flt, errors.New("make-n-copies")) 274 c.addNotif(xctn) // notify upon completion 275 xact.GoRunW(xctn) 276 277 return xctn.ID(), nil 278 default: 279 debug.Assert(false) 280 } 281 return "", nil 282 } 283 284 func (t *target) validateMakeNCopies(bck *meta.Bck, msg *aisMsg) (curCopies, newCopies int64, err error) { 285 curCopies = bck.Props.Mirror.Copies 286 newCopies, err = _parseNCopies(msg.Value) 287 if err == nil { 288 err = fs.ValidateNCopies(t.si.Name(), int(newCopies)) 289 } 290 // (consider adding "force" option similar to CopyBckMsg.Force) 291 if err == nil { 292 err = xreg.LimitedCoexistence(t.si, bck, msg.Action) 293 } 294 if err != nil { 295 return 296 } 297 // don't allow increasing num-copies when used cap is above high wm (let alone OOS) 298 if bck.Props.Mirror.Copies < newCopies { 299 cs := fs.Cap() 300 err = cs.Err() 301 } 302 return 303 } 304 305 // 306 // setBprops 307 // 308 309 func (t *target) setBprops(c *txnSrv) (string, error) { 310 switch c.phase { 311 case apc.ActBegin: 312 if err := c.bck.Init(t.owner.bmd); err != nil { 313 return "", err 314 } 315 var ( 316 nprops *cmn.Bprops 317 err error 318 ) 319 if nprops, err = t.validateNprops(c.bck, c.msg); err != nil { 320 return "", err 321 } 322 nlp := newBckNLP(c.bck) 323 if !nlp.TryLock(c.timeout.netw / 2) { 324 return "", cmn.NewErrBusy("bucket", c.bck.Cname("")) 325 } 326 txn := newTxnSetBucketProps(c, nprops) 327 if err := t.transactions.begin(txn, nlp); err != nil { 328 return "", err 329 } 330 case apc.ActAbort: 331 t.transactions.find(c.uuid, apc.ActAbort) 332 case apc.ActCommit: 333 if err := c.bck.Init(t.owner.bmd); err != nil { 334 return "", err 335 } 336 var xid string 337 txn, err := t.transactions.find(c.uuid, "") 338 if err != nil { 339 return "", err 340 } 341 txnSetBprops := txn.(*txnSetBucketProps) 342 bprops, nprops := txnSetBprops.bprops, txnSetBprops.nprops 343 // wait for newBMD w/timeout 344 if err = t.transactions.wait(txn, c.timeout.netw, c.timeout.host); err != nil { 345 return "", cmn.NewErrFailedTo(t, "commit", txn, err) 346 } 347 if _reMirror(bprops, nprops) { 348 n := int(nprops.Mirror.Copies) 349 rns := xreg.RenewBckMakeNCopies(c.bck, c.uuid, "mnc-setprops", n) 350 if rns.Err != nil { 351 return "", fmt.Errorf("%s %s: %v", t, txn, rns.Err) 352 } 353 xctn := rns.Entry.Get() 354 flt := xreg.Flt{Kind: apc.ActPutCopies, Bck: c.bck} 355 xreg.DoAbort(flt, errors.New("re-mirror")) 356 c.addNotif(xctn) // notify upon completion 357 xact.GoRunW(xctn) 358 xid = xctn.ID() 359 } 360 if _, reec := _reEC(bprops, nprops, c.bck, nil /*smap*/); reec { 361 flt := xreg.Flt{Kind: apc.ActECEncode, Bck: c.bck} 362 xreg.DoAbort(flt, errors.New("re-ec")) 363 rns := xreg.RenewECEncode(c.bck, c.uuid, apc.ActCommit) 364 if rns.Err != nil { 365 return "", rns.Err 366 } 367 xctn := rns.Entry.Get() 368 c.addNotif(xctn) // ditto 369 xact.GoRunW(xctn) 370 371 if xid == "" { 372 xid = xctn.ID() 373 } else { 374 xid = "" // not supporting multiple.. 375 } 376 } 377 return xid, nil 378 default: 379 debug.Assert(false) 380 } 381 return "", nil 382 } 383 384 func (t *target) validateNprops(bck *meta.Bck, msg *aisMsg) (nprops *cmn.Bprops, err error) { 385 var ( 386 body = cos.MustMarshal(msg.Value) 387 cs = fs.Cap() 388 ) 389 nprops = &cmn.Bprops{} 390 if err = jsoniter.Unmarshal(body, nprops); err != nil { 391 err = fmt.Errorf(cmn.FmtErrUnmarshal, t, "new bucket props", cos.BHead(body), err) 392 return 393 } 394 err = cs.Err() 395 if nprops.Mirror.Enabled { 396 mpathCount := fs.NumAvail() 397 if int(nprops.Mirror.Copies) > mpathCount { 398 err = fmt.Errorf(fmtErrInsuffMpaths1, t, mpathCount, bck, nprops.Mirror.Copies) 399 return 400 } 401 if nprops.Mirror.Copies < bck.Props.Mirror.Copies { 402 err = nil 403 } 404 } 405 if !nprops.EC.Enabled && bck.Props.EC.Enabled { 406 err = nil 407 } 408 return 409 } 410 411 // 412 // renameBucket 413 // 414 415 func (t *target) renameBucket(c *txnSrv) (string, error) { 416 switch c.phase { 417 case apc.ActBegin: 418 if err := c.bck.Init(t.owner.bmd); err != nil { 419 return "", err 420 } 421 bckFrom, bckTo := c.bck, c.bckTo 422 if err := t.validateBckRenTxn(bckFrom, bckTo, c.msg); err != nil { 423 return "", err 424 } 425 nlpFrom := newBckNLP(bckFrom) 426 nlpTo := newBckNLP(bckTo) 427 if !nlpFrom.TryLock(c.timeout.netw / 4) { 428 return "", cmn.NewErrBusy("bucket", bckFrom.Cname("")) 429 } 430 if !nlpTo.TryLock(c.timeout.netw / 4) { 431 nlpFrom.Unlock() 432 return "", cmn.NewErrBusy("bucket", bckTo.Cname("")) 433 } 434 txn := newTxnRenameBucket(c, bckFrom, bckTo) 435 if err := t.transactions.begin(txn, nlpFrom, nlpTo); err != nil { 436 return "", err 437 } 438 case apc.ActAbort: 439 t.transactions.find(c.uuid, apc.ActAbort) 440 case apc.ActCommit: 441 if err := c.bck.Init(t.owner.bmd); err != nil { 442 return "", err 443 } 444 txn, err := t.transactions.find(c.uuid, "") 445 if err != nil { 446 return "", err 447 } 448 txnRenB := txn.(*txnRenameBucket) 449 // wait for newBMD w/timeout 450 if err = t.transactions.wait(txn, c.timeout.netw, c.timeout.host); err != nil { 451 return "", cmn.NewErrFailedTo(t, "commit", txn, err) 452 } 453 rns := xreg.RenewBckRename(txnRenB.bckFrom, txnRenB.bckTo, c.uuid, c.msg.RMDVersion, apc.ActCommit) 454 if rns.Err != nil { 455 nlog.Errorf("%s: %s %v", t, txn, rns.Err) 456 return "", rns.Err // must not happen at commit time 457 } 458 xctn := rns.Entry.Get() 459 err = fs.RenameBucketDirs(txnRenB.bckFrom.Bucket(), txnRenB.bckTo.Bucket()) 460 if err != nil { 461 return "", err // ditto 462 } 463 c.addNotif(xctn) // notify upon completion 464 465 reb.OnTimedGFN() 466 xact.GoRunW(xctn) // run and wait until it starts running 467 468 return xctn.ID(), nil 469 default: 470 debug.Assert(false) 471 } 472 return "", nil 473 } 474 475 func (t *target) validateBckRenTxn(bckFrom, bckTo *meta.Bck, msg *aisMsg) error { 476 cs := fs.Cap() 477 if err := cs.Err(); err != nil { 478 return err 479 } 480 if err := xreg.LimitedCoexistence(t.si, bckFrom, msg.Action, bckTo); err != nil { 481 return err 482 } 483 bmd := t.owner.bmd.get() 484 if _, present := bmd.Get(bckFrom); !present { 485 return cmn.NewErrBckNotFound(bckFrom.Bucket()) 486 } 487 if _, present := bmd.Get(bckTo); present { 488 return cmn.NewErrBckAlreadyExists(bckTo.Bucket()) 489 } 490 availablePaths := fs.GetAvail() 491 for _, mi := range availablePaths { 492 path := mi.MakePathCT(bckTo.Bucket(), fs.ObjectType) 493 if err := cos.Stat(path); err != nil { 494 if !os.IsNotExist(err) { 495 return err 496 } 497 continue 498 } 499 if names, empty, err := fs.IsDirEmpty(path); err != nil { 500 return err 501 } else if !empty { 502 return fmt.Errorf("directory %q already exists and is not empty (%v...)", path, names) 503 } 504 } 505 return nil 506 } 507 508 func etlDP(msg *apc.TCBMsg) (core.DP, error) { 509 if !k8s.IsK8s() { 510 return nil, k8s.ErrK8sRequired 511 } 512 if err := msg.Validate(true); err != nil { 513 return nil, err 514 } 515 return etl.NewOfflineDP(msg, cmn.GCO.Get()) 516 } 517 518 // common for both bucket copy and bucket transform - does the heavy lifting 519 func (t *target) tcb(c *txnSrv, msg *apc.TCBMsg, dp core.DP) (string, error) { 520 switch c.phase { 521 case apc.ActBegin: 522 if err := c.bck.Init(t.owner.bmd); err != nil { 523 return "", err 524 } 525 bckTo, bckFrom := c.bckTo, c.bck 526 if err := bckTo.Validate(); err != nil { 527 return "", err 528 } 529 if err := bckFrom.Validate(); err != nil { 530 return "", err 531 } 532 cs := fs.Cap() 533 if err := cs.Err(); err != nil { 534 return "", err 535 } 536 if err := xreg.LimitedCoexistence(t.si, bckFrom, c.msg.Action); err != nil { 537 if !msg.Force { 538 return "", err 539 } 540 nlog.Errorf("%s: %v - %q is \"forced\", proceeding anyway", t, err, c.msg.Action) 541 } 542 bmd := t.owner.bmd.get() 543 if _, present := bmd.Get(bckFrom); !present { 544 return "", cmn.NewErrBckNotFound(bckFrom.Bucket()) 545 } 546 if err := t._tcbBegin(c, msg, dp); err != nil { 547 return "", err 548 } 549 case apc.ActAbort: 550 t.transactions.find(c.uuid, apc.ActAbort) 551 case apc.ActCommit: 552 if err := c.bck.Init(t.owner.bmd); err != nil { 553 return "", err 554 } 555 txn, err := t.transactions.find(c.uuid, "") 556 if err != nil { 557 return "", err 558 } 559 txnTcb := txn.(*txnTCB) 560 561 if c.query.Get(apc.QparamWaitMetasync) != "" { 562 if err = t.transactions.wait(txn, c.timeout.netw, c.timeout.host); err != nil { 563 txnTcb.xtcb.TxnAbort(err) 564 return "", cmn.NewErrFailedTo(t, "commit", txn, err) 565 } 566 } else { 567 t.transactions.find(c.uuid, apc.ActCommit) 568 } 569 570 custom := txnTcb.xtcb.Args() 571 if custom.Phase != apc.ActBegin { 572 err = fmt.Errorf("%s: %s is already running", t, txnTcb) // never here 573 nlog.Errorln(err) 574 return "", err 575 } 576 custom.Phase = apc.ActCommit 577 rns := xreg.RenewTCB(c.uuid, c.msg.Action /*kind*/, txnTcb.xtcb.Args()) 578 if rns.Err != nil { 579 if !cmn.IsErrXactUsePrev(rns.Err) { 580 txnTcb.xtcb.TxnAbort(rns.Err) 581 nlog.Errorf("%s: %s %v", t, txn, rns.Err) 582 } 583 return "", rns.Err 584 } 585 xctn := rns.Entry.Get() 586 xid := xctn.ID() 587 debug.Assert(xid == txnTcb.xtcb.ID()) 588 c.addNotif(xctn) // notify upon completion 589 xact.GoRunW(xctn) 590 return xid, nil 591 default: 592 debug.Assert(false) 593 } 594 return "", nil 595 } 596 597 func (t *target) _tcbBegin(c *txnSrv, msg *apc.TCBMsg, dp core.DP) (err error) { 598 var ( 599 bckTo, bckFrom = c.bckTo, c.bck 600 nlpFrom = newBckNLP(bckFrom) 601 nlpTo core.NLP 602 ) 603 if !nlpFrom.TryRLock(c.timeout.netw / 4) { 604 return cmn.NewErrBusy("bucket", bckFrom.Cname("")) 605 } 606 if !msg.DryRun && !bckFrom.Equal(bckTo, true, true) { 607 nlpTo = newBckNLP(bckTo) 608 if !nlpTo.TryLock(c.timeout.netw / 4) { 609 nlpFrom.Unlock() 610 return cmn.NewErrBusy("bucket", bckTo.Cname("")) 611 } 612 } 613 custom := &xreg.TCBArgs{Phase: apc.ActBegin, BckFrom: bckFrom, BckTo: bckTo, DP: dp, Msg: msg} 614 rns := xreg.RenewTCB(c.uuid, c.msg.Action /*kind*/, custom) 615 if err = rns.Err; err != nil { 616 nlog.Errorf("%s: %q %+v %v", t, c.uuid, msg, rns.Err) 617 return 618 } 619 620 var ( 621 xctn = rns.Entry.Get() 622 xtcb = xctn.(*xs.XactTCB) 623 txn = newTxnTCB(c, xtcb) 624 nlps = []core.NLP{nlpFrom} 625 ) 626 if nlpTo != nil { 627 nlps = append(nlps, nlpTo) 628 } 629 return t.transactions.begin(txn, nlps...) 630 } 631 632 // Two IDs: 633 // - TxnUUID: transaction (txn) ID 634 // - xid: xaction ID (will have "tco-" prefix) 635 func (t *target) tcobjs(c *txnSrv, msg *cmn.TCObjsMsg, dp core.DP) (xid string, _ error) { 636 switch c.phase { 637 case apc.ActBegin: 638 var ( 639 bckTo = c.bckTo 640 bckFrom = c.bck // from 641 ) 642 if err := c.bck.Init(t.owner.bmd); err != nil { 643 return xid, err 644 } 645 // validate 646 if err := bckTo.Validate(); err != nil { 647 return xid, err 648 } 649 if err := bckFrom.Validate(); err != nil { 650 return xid, err 651 } 652 cs := fs.Cap() 653 if err := cs.Err(); err != nil { 654 return xid, err 655 } 656 if err := xreg.LimitedCoexistence(t.si, bckFrom, c.msg.Action); err != nil { 657 return xid, err 658 } 659 bmd := t.owner.bmd.get() 660 if _, present := bmd.Get(bckFrom); !present { 661 return xid, cmn.NewErrBckNotFound(bckFrom.Bucket()) 662 } 663 // begin 664 custom := &xreg.TCObjsArgs{BckFrom: bckFrom, BckTo: bckTo, DP: dp} 665 rns := xreg.RenewTCObjs(c.msg.Action /*kind*/, custom) 666 if rns.Err != nil { 667 nlog.Errorf("%s: %q %+v %v", t, c.uuid, c.msg, rns.Err) 668 return xid, rns.Err 669 } 670 xctn := rns.Entry.Get() 671 xid = xctn.ID() 672 673 xtco := xctn.(*xs.XactTCObjs) 674 675 debug.Assert(msg.TxnUUID == "" || msg.TxnUUID == c.uuid) // (ref050724) 676 msg.TxnUUID = c.uuid 677 txn := newTxnTCObjs(c, bckFrom, xtco, msg) 678 if err := t.transactions.begin(txn); err != nil { 679 return xid, err 680 } 681 xtco.Begin(msg) 682 case apc.ActAbort: 683 txn, err := t.transactions.find(c.uuid, apc.ActAbort) 684 if err == nil { 685 txnTco := txn.(*txnTCObjs) 686 // if _this_ transaction initiated _that_ on-demand 687 if xtco := txnTco.xtco; xtco != nil && xtco.ID() == c.uuid { 688 xid = xtco.ID() 689 xtco.Abort(nil) 690 } 691 } 692 case apc.ActCommit: 693 if err := c.bck.Init(t.owner.bmd); err != nil { 694 return xid, err 695 } 696 txn, err := t.transactions.find(c.uuid, "") 697 if err != nil { 698 return xid, err 699 } 700 txnTco := txn.(*txnTCObjs) 701 var done bool 702 if c.query.Get(apc.QparamWaitMetasync) != "" { 703 if err = t.transactions.wait(txn, c.timeout.netw, c.timeout.host); err != nil { 704 txnTco.xtco.TxnAbort(err) 705 return "", cmn.NewErrFailedTo(t, "commit", txn, err) 706 } 707 done = true 708 } 709 710 txnTco.xtco.Do(txnTco.msg) 711 xid = txnTco.xtco.ID() 712 if !done { 713 t.transactions.find(c.uuid, apc.ActCommit) 714 } 715 default: 716 debug.Assert(false) 717 } 718 return xid, nil 719 } 720 721 // 722 // ecEncode 723 // 724 725 func (t *target) ecEncode(c *txnSrv) (string, error) { 726 switch c.phase { 727 case apc.ActBegin: 728 if err := c.bck.Init(t.owner.bmd); err != nil { 729 return "", err 730 } 731 if err := t.validateECEncode(c.bck, c.msg); err != nil { 732 return "", err 733 } 734 cs := fs.Cap() 735 if err := cs.Err(); err != nil { 736 return "", err 737 } 738 nlp := newBckNLP(c.bck) 739 740 if !nlp.TryLock(c.timeout.netw / 4) { 741 return "", cmn.NewErrBusy("bucket", c.bck.Cname("")) 742 } 743 txn := newTxnECEncode(c, c.bck) 744 if err := t.transactions.begin(txn, nlp); err != nil { 745 return "", err 746 } 747 case apc.ActAbort: 748 t.transactions.find(c.uuid, apc.ActAbort) 749 case apc.ActCommit: 750 if err := c.bck.Init(t.owner.bmd); err != nil { 751 return "", err 752 } 753 txn, err := t.transactions.find(c.uuid, "") 754 if err != nil { 755 return "", err 756 } 757 // wait for newBMD w/timeout 758 if err = t.transactions.wait(txn, c.timeout.netw, c.timeout.host); err != nil { 759 return "", cmn.NewErrFailedTo(t, "commit", txn, err) 760 } 761 rns := xreg.RenewECEncode(c.bck, c.uuid, apc.ActCommit) 762 if rns.Err != nil { 763 nlog.Errorf("%s: %s %v", t, txn, rns.Err) 764 return "", rns.Err 765 } 766 xctn := rns.Entry.Get() 767 c.addNotif(xctn) // notify upon completion 768 xact.GoRunW(xctn) 769 770 return xctn.ID(), rns.Err 771 default: 772 debug.Assert(false) 773 } 774 return "", nil 775 } 776 777 func (t *target) validateECEncode(bck *meta.Bck, msg *aisMsg) error { 778 cs := fs.Cap() 779 if err := cs.Err(); err != nil { 780 return err 781 } 782 return xreg.LimitedCoexistence(t.si, bck, msg.Action) 783 } 784 785 // 786 // createArchMultiObj 787 // 788 789 func (t *target) createArchMultiObj(c *txnSrv) (string /*xaction uuid*/, error) { 790 var xid string 791 switch c.phase { 792 case apc.ActBegin: 793 var ( 794 bckTo = c.bckTo 795 bckFrom = c.bck 796 ) 797 if err := c.bck.Init(t.owner.bmd); err != nil { 798 return xid, err 799 } 800 if err := bckTo.Validate(); err != nil { 801 return xid, err 802 } 803 if !bckFrom.Equal(bckTo, false, false) { 804 if err := bckFrom.Validate(); err != nil { 805 return xid, err 806 } 807 } 808 archMsg := &cmn.ArchiveBckMsg{} 809 if err := cos.MorphMarshal(c.msg.Value, archMsg); err != nil { 810 return xid, fmt.Errorf(cmn.FmtErrMorphUnmarshal, t, c.msg.Action, c.msg.Value, err) 811 } 812 mime, err := archive.Mime(archMsg.Mime, archMsg.ArchName) 813 if err != nil { 814 return xid, err 815 } 816 archMsg.Mime = mime // set it for xarch 817 818 cs := fs.Cap() 819 if err := cs.Err(); err != nil { 820 return xid, err 821 } 822 823 rns := xreg.RenewPutArchive(bckFrom, bckTo) 824 if rns.Err != nil { 825 nlog.Errorf("%s: %q %+v %v", t, c.uuid, archMsg, rns.Err) 826 return xid, rns.Err 827 } 828 xctn := rns.Entry.Get() 829 xid = xctn.ID() 830 831 xarch := xctn.(*xs.XactArch) 832 // finalize the message and begin local transaction 833 archMsg.TxnUUID = c.uuid 834 archMsg.FromBckName = bckFrom.Name 835 archlom := core.AllocLOM(archMsg.ArchName) 836 if err := xarch.Begin(archMsg, archlom); err != nil { 837 core.FreeLOM(archlom) // otherwise is freed by x-archive 838 return xid, err 839 } 840 txn := newTxnArchMultiObj(c, bckFrom, xarch, archMsg) 841 if err := t.transactions.begin(txn); err != nil { 842 return xid, err 843 } 844 case apc.ActAbort: 845 txn, err := t.transactions.find(c.uuid, apc.ActAbort) 846 if err == nil { 847 txnArch := txn.(*txnArchMultiObj) 848 // if _this_ transaction initiated _that_ on-demand 849 if xarch := txnArch.xarch; xarch != nil && xarch.ID() == c.uuid { 850 xid = xarch.ID() 851 xarch.Abort(nil) 852 } 853 } 854 case apc.ActCommit: 855 if err := c.bck.Init(t.owner.bmd); err != nil { 856 return xid, err 857 } 858 txn, err := t.transactions.find(c.uuid, "") 859 if err != nil { 860 return xid, err 861 } 862 txnArch := txn.(*txnArchMultiObj) 863 txnArch.xarch.Do(txnArch.msg) 864 xid = txnArch.xarch.ID() 865 t.transactions.find(c.uuid, apc.ActCommit) 866 } 867 return xid, nil 868 } 869 870 // 871 // begin (maintenance -- decommission -- shutdown) via p.beginRmTarget 872 // 873 874 func (t *target) beginRm(c *txnSrv) error { 875 var opts apc.ActValRmNode 876 if c.phase != apc.ActBegin { 877 return fmt.Errorf("%s: expecting begin phase, got %q", t, c.phase) 878 } 879 if err := cos.MorphMarshal(c.msg.Value, &opts); err != nil { 880 return fmt.Errorf(cmn.FmtErrMorphUnmarshal, t, c.msg.Action, c.msg.Value, err) 881 } 882 return xreg.LimitedCoexistence(t.si, nil, c.msg.Action) 883 } 884 885 // 886 // destroy local bucket / evict cloud bucket 887 // 888 889 func (t *target) destroyBucket(c *txnSrv) error { 890 switch c.phase { 891 case apc.ActBegin: 892 nlp := newBckNLP(c.bck) 893 if !nlp.TryLock(c.timeout.netw / 2) { 894 return cmn.NewErrBusy("bucket", c.bck.Cname("")) 895 } 896 txn := newTxnBckBase(c.bck) 897 txn.fillFromCtx(c) 898 if err := t.transactions.begin(txn, nlp); err != nil { 899 return err 900 } 901 case apc.ActAbort: 902 t.transactions.find(c.uuid, apc.ActAbort) 903 case apc.ActCommit: 904 t._commitCreateDestroy(c) 905 default: 906 debug.Assert(false) 907 } 908 return nil 909 } 910 911 func (t *target) promote(c *txnSrv, hdr http.Header) (string, error) { 912 switch c.phase { 913 case apc.ActBegin: 914 if err := c.bck.Init(t.owner.bmd); err != nil { 915 return "", err 916 } 917 cs := fs.Cap() 918 if err := cs.Err(); err != nil { 919 return "", err 920 } 921 prmMsg := &apc.PromoteArgs{} 922 if err := cos.MorphMarshal(c.msg.Value, prmMsg); err != nil { 923 err = fmt.Errorf(cmn.FmtErrMorphUnmarshal, t, c.msg.Action, c.msg.Value, err) 924 return "", err 925 } 926 if strings.Contains(prmMsg.ObjName, "../") || strings.Contains(prmMsg.ObjName, "~/") { 927 return "", fmt.Errorf("invalid object name or prefix %q", prmMsg.ObjName) 928 } 929 srcFQN := c.msg.Name 930 finfo, err := os.Stat(srcFQN) 931 if err != nil { 932 return "", err 933 } 934 if !finfo.IsDir() { 935 txn := newTxnPromote(c, prmMsg, []string{srcFQN}, "" /*dirFQN*/, 1) 936 if err := t.transactions.begin(txn); err != nil { 937 return "", err 938 } 939 hdr.Set(apc.HdrPromoteNamesNum, "1") 940 return "", nil 941 } 942 943 // directory 944 fqns, totalN, cksumVal, err := prmScan(srcFQN, prmMsg) 945 if totalN == 0 { 946 if err != nil { 947 return "", err 948 } 949 return "", fmt.Errorf("%s: directory %q is empty", t, srcFQN) 950 } 951 txn := newTxnPromote(c, prmMsg, fqns, srcFQN /*dir*/, totalN) 952 if err := t.transactions.begin(txn); err != nil { 953 return "", err 954 } 955 hdr.Set(apc.HdrPromoteNamesHash, cksumVal) 956 hdr.Set(apc.HdrPromoteNamesNum, strconv.Itoa(totalN)) 957 case apc.ActAbort: 958 t.transactions.find(c.uuid, apc.ActAbort) 959 case apc.ActCommit: 960 if err := c.bck.Init(t.owner.bmd); err != nil { 961 return "", err 962 } 963 txn, err := t.transactions.find(c.uuid, "") 964 if err != nil { 965 return "", err 966 } 967 txnPrm, ok := txn.(*txnPromote) 968 debug.Assert(ok) 969 defer t.transactions.find(c.uuid, apc.ActCommit) 970 971 if txnPrm.totalN == 0 { 972 nlog.Infof("%s: nothing to do (%s)", t, txnPrm) 973 return "", nil 974 } 975 // set by controlling proxy upon collecting and comparing all the begin-phase results 976 txnPrm.fshare = c.query.Get(apc.QparamConfirmFshare) != "" 977 978 // promote synchronously wo/ xaction; 979 // (set by proxy to eliminate any ambiguity vis-a-vis `promoteNumSync` special) 980 if noXact := c.query.Get(apc.QparamActNoXact) != ""; noXact { 981 nlog.Infof("%s: promote synchronously %s", t, txnPrm) 982 err := t.prmNumFiles(c, txnPrm, txnPrm.fshare) 983 return "", err 984 } 985 986 rns := xreg.RenewPromote(c.uuid, c.bck, txnPrm.msg) 987 if rns.Err != nil { 988 nlog.Errorf("%s: %s %v", t, txnPrm, rns.Err) 989 return "", rns.Err 990 } 991 xprm := rns.Entry.Get().(*xs.XactDirPromote) 992 xprm.SetFshare(txnPrm.fshare) 993 txnPrm.xprm = xprm 994 995 c.addNotif(xprm) // upon completion 996 xact.GoRunW(xprm) 997 return xprm.ID(), nil 998 default: 999 debug.Assert(false) 1000 } 1001 return "", nil 1002 } 1003 1004 // scan and, optionally, auto-detect file-share 1005 func prmScan(dirFQN string, prmMsg *apc.PromoteArgs) (fqns []string, totalN int, cksumVal string, err error) { 1006 var ( 1007 cksum *cos.CksumHash 1008 autoDetect = !prmMsg.SrcIsNotFshare || !cmn.Rom.Features().IsSet(feat.DontAutoDetectFshare) 1009 ) 1010 cb := func(fqn string, de fs.DirEntry) (err error) { 1011 if de.IsDir() { 1012 return 1013 } 1014 if len(fqns) == 0 { 1015 fqns = make([]string, 0, promoteNumSync) 1016 } 1017 if len(fqns) < promoteNumSync { 1018 fqns = append(fqns, fqn) 1019 } 1020 totalN++ 1021 if autoDetect { 1022 cksum.H.Write([]byte(fqn)) 1023 } 1024 return 1025 } 1026 if autoDetect { 1027 cksum = cos.NewCksumHash(cos.ChecksumXXHash) 1028 } 1029 if prmMsg.Recursive { 1030 opts := &fs.WalkOpts{Dir: dirFQN, Callback: cb, Sorted: true} 1031 err = fs.Walk(opts) 1032 } else { 1033 err = fs.WalkDir(dirFQN, cb) 1034 } 1035 1036 if err != nil || totalN == 0 || !autoDetect { 1037 return 1038 } 1039 cksum.Finalize() 1040 cksumVal = cksum.Value() 1041 return 1042 } 1043 1044 // synchronously wo/ xaction 1045 func (t *target) prmNumFiles(c *txnSrv, txnPrm *txnPromote, confirmedFshare bool) error { 1046 smap := t.owner.smap.Get() 1047 config := cmn.GCO.Get() 1048 for _, fqn := range txnPrm.fqns { 1049 objName, err := xs.PrmObjName(fqn, txnPrm.dirFQN, txnPrm.msg.ObjName) 1050 if err != nil { 1051 return err 1052 } 1053 // file share == true: promote only the part of the txnPrm.fqns that "lands" locally 1054 if confirmedFshare { 1055 si, err := smap.HrwName2T(c.bck.MakeUname(objName)) 1056 if err != nil { 1057 return err 1058 } 1059 if si.ID() != t.SID() { 1060 continue 1061 } 1062 } 1063 params := core.PromoteParams{ 1064 Bck: c.bck, 1065 Config: config, 1066 PromoteArgs: apc.PromoteArgs{ 1067 SrcFQN: fqn, 1068 ObjName: objName, 1069 OverwriteDst: txnPrm.msg.OverwriteDst, 1070 DeleteSrc: txnPrm.msg.DeleteSrc, 1071 }, 1072 } 1073 if _, err := t.Promote(¶ms); err != nil { 1074 return err 1075 } 1076 } 1077 return nil 1078 } 1079 1080 //////////// 1081 // txnSrv // 1082 //////////// 1083 1084 func (c *txnSrv) init(r *http.Request, bucket string) (err error) { 1085 c.callerName = r.Header.Get(apc.HdrCallerName) 1086 c.callerID = r.Header.Get(apc.HdrCallerID) 1087 1088 query := r.URL.Query() 1089 if bucket != "" { 1090 if c.bck, err = newBckFromQ(bucket, query, nil); err != nil { 1091 return err 1092 } 1093 } 1094 c.bckTo, err = newBckFromQuname(query, false /*required*/) 1095 if err != nil { 1096 return err 1097 } 1098 1099 // latency = (network) +- (clock drift) 1100 if c.phase == apc.ActBegin { 1101 if ptime := query.Get(apc.QparamUnixTime); ptime != "" { 1102 now := time.Now().UnixNano() 1103 dur := ptLatency(now, ptime, r.Header.Get(apc.HdrCallerIsPrimary)) 1104 lim := int64(cmn.Rom.CplaneOperation()) >> 1 1105 if dur > lim || dur < -lim { 1106 nlog.Errorf("Warning: clock drift %s <-> %s(self) = %v, txn %s[%s]", 1107 c.callerName, c.t, time.Duration(dur), c.msg.Action, c.msg.UUID) 1108 } 1109 } 1110 } 1111 1112 c.uuid = c.msg.UUID 1113 if c.uuid == "" { 1114 return nil 1115 } 1116 if tout := query.Get(apc.QparamNetwTimeout); tout != "" { 1117 c.timeout.netw, err = cos.S2Duration(tout) 1118 debug.AssertNoErr(err) 1119 } 1120 if tout := query.Get(apc.QparamHostTimeout); tout != "" { 1121 c.timeout.host, err = cos.S2Duration(tout) 1122 debug.AssertNoErr(err) 1123 } 1124 c.query = query // operation-specific values, if any 1125 return err 1126 } 1127 1128 func (c *txnSrv) addNotif(xctn core.Xact) { 1129 dsts, ok := c.query[apc.QparamNotifyMe] 1130 if !ok { 1131 return 1132 } 1133 xctn.AddNotif(&xact.NotifXact{ 1134 Base: nl.Base{When: core.UponTerm, Dsts: dsts, F: c.t.notifyTerm}, 1135 Xact: xctn, 1136 }) 1137 }