github.com/ergo-services/ergo@v1.999.224/gen/saga.go (about) 1 package gen 2 3 import ( 4 "fmt" 5 "math" 6 "sync" 7 "time" 8 9 "github.com/ergo-services/ergo/etf" 10 "github.com/ergo-services/ergo/lib" 11 ) 12 13 // SagaBehavior interface 14 type SagaBehavior interface { 15 ServerBehavior 16 17 // 18 // Mandatory callbacks 19 // 20 21 // InitSaga 22 InitSaga(process *SagaProcess, args ...etf.Term) (SagaOptions, error) 23 24 // HandleTxNew invokes on a new TX receiving by this saga. 25 HandleTxNew(process *SagaProcess, id SagaTransactionID, value interface{}) SagaStatus 26 27 // HandleTxResult invoked on a receiving result from the next saga 28 HandleTxResult(process *SagaProcess, id SagaTransactionID, from SagaNextID, result interface{}) SagaStatus 29 30 // HandleTxCancel invoked on a request of transaction cancelation. 31 HandleTxCancel(process *SagaProcess, id SagaTransactionID, reason string) SagaStatus 32 33 // 34 // Optional callbacks 35 // 36 37 // HandleTxDone invoked when the transaction is done on a saga where it was created. 38 // It returns the final result and SagaStatus. The commit message will deliver the final 39 // result to all participants of this transaction (if it has enabled the TwoPhaseCommit option). 40 // Otherwise the final result will be ignored. 41 HandleTxDone(process *SagaProcess, id SagaTransactionID, result interface{}) (interface{}, SagaStatus) 42 43 // HandleTxInterim invoked if received interim result from the next hop 44 HandleTxInterim(process *SagaProcess, id SagaTransactionID, from SagaNextID, interim interface{}) SagaStatus 45 46 // HandleTxCommit invoked if TwoPhaseCommit option is enabled for the given TX. 47 // All sagas involved in this TX receive a commit message with final value and invoke this callback. 48 // The final result has a value returned by HandleTxDone on a Saga created this TX. 49 HandleTxCommit(process *SagaProcess, id SagaTransactionID, final interface{}) SagaStatus 50 51 // 52 // Callbacks to handle result/interim from the worker(s) 53 // 54 55 // HandleJobResult 56 HandleJobResult(process *SagaProcess, id SagaTransactionID, from SagaJobID, result interface{}) SagaStatus 57 // HandleJobInterim 58 HandleJobInterim(process *SagaProcess, id SagaTransactionID, from SagaJobID, interim interface{}) SagaStatus 59 // HandleJobFailed 60 HandleJobFailed(process *SagaProcess, id SagaTransactionID, from SagaJobID, reason string) SagaStatus 61 62 // 63 // Server's callbacks 64 // 65 66 // HandleStageCall this callback is invoked on ServerProcess.Call. This method is optional 67 // for the implementation 68 HandleSagaCall(process *SagaProcess, from ServerFrom, message etf.Term) (etf.Term, ServerStatus) 69 // HandleStageCast this callback is invoked on ServerProcess.Cast. This method is optional 70 // for the implementation 71 HandleSagaCast(process *SagaProcess, message etf.Term) ServerStatus 72 // HandleStageInfo this callback is invoked on Process.Send. This method is optional 73 // for the implementation 74 HandleSagaInfo(process *SagaProcess, message etf.Term) ServerStatus 75 // HandleSagaDirect this callback is invoked on Process.Direct. This method is optional 76 // for the implementation 77 HandleSagaDirect(process *SagaProcess, ref etf.Ref, message interface{}) (interface{}, DirectStatus) 78 } 79 80 const ( 81 defaultHopLimit = math.MaxUint16 82 defaultLifespan = 60 83 ) 84 85 // SagaStatus 86 type SagaStatus error 87 88 var ( 89 SagaStatusOK SagaStatus // nil 90 SagaStatusStop SagaStatus = fmt.Errorf("stop") 91 92 // internal 93 94 ErrSagaTxEndOfLifespan = fmt.Errorf("End of TX lifespan") 95 ErrSagaTxNextTimeout = fmt.Errorf("Next saga timeout") 96 ErrSagaUnknown = fmt.Errorf("Unknown saga") 97 ErrSagaJobUnknown = fmt.Errorf("Unknown job") 98 ErrSagaTxUnknown = fmt.Errorf("Unknown TX") 99 ErrSagaTxCanceled = fmt.Errorf("Tx is canceled") 100 ErrSagaTxInProgress = fmt.Errorf("Tx is still in progress") 101 ErrSagaResultAlreadySent = fmt.Errorf("Result is already sent") 102 ErrSagaNotAllowed = fmt.Errorf("Operation is not allowed") 103 ) 104 105 // Saga 106 type Saga struct { 107 Server 108 } 109 110 // SagaTransactionOptions 111 type SagaTransactionOptions struct { 112 // HopLimit defines a number of hop within the transaction. Default limit 113 // is 0 (no limit). 114 HopLimit uint 115 // Lifespan defines a lifespan for the transaction in seconds. Default is 60. 116 Lifespan uint 117 118 // TwoPhaseCommit enables 2PC for the transaction. This option makes all 119 // Sagas involved in this transaction invoke HandleCommit callback on them and 120 // invoke HandleCommitJob callback on Worker processes once the transaction is finished. 121 TwoPhaseCommit bool 122 } 123 124 // SagaOptions 125 type SagaOptions struct { 126 // MaxTransactions defines the limit for the number of active transactions. Default: 0 (unlimited) 127 MaxTransactions uint 128 // Worker 129 Worker SagaWorkerBehavior 130 } 131 132 // SagaProcess 133 type SagaProcess struct { 134 ServerProcess 135 options SagaOptions 136 behavior SagaBehavior 137 138 // running transactions 139 txs map[SagaTransactionID]*SagaTransaction 140 mutexTXS sync.Mutex 141 142 // next sagas where txs were sent 143 next map[SagaNextID]*SagaTransaction 144 mutexNext sync.Mutex 145 146 // running jobs 147 jobs map[etf.Pid]*SagaJob 148 mutexJobs sync.Mutex 149 } 150 151 // SagaTransactionID 152 type SagaTransactionID etf.Ref 153 154 // String 155 func (id SagaTransactionID) String() string { 156 r := etf.Ref(id) 157 return fmt.Sprintf("TX#%d.%d.%d", r.ID[0], r.ID[1], r.ID[2]) 158 } 159 160 // SagaTransaction 161 type SagaTransaction struct { 162 sync.Mutex 163 id SagaTransactionID 164 options SagaTransactionOptions 165 origin SagaNextID // next id on a saga it came from 166 monitor etf.Ref // monitor parent saga 167 next map[SagaNextID]*SagaNext // where were sent 168 jobs map[SagaJobID]etf.Pid 169 arrival int64 // when it arrived on this saga 170 parents []etf.Pid // sagas trace 171 172 done bool // do not allow send result more than once if 2PC is set 173 cancelTimer CancelFunc 174 } 175 176 // SagaNextID 177 type SagaNextID etf.Ref 178 179 // String 180 func (id SagaNextID) String() string { 181 r := etf.Ref(id) 182 return fmt.Sprintf("Next#%d.%d.%d", r.ID[0], r.ID[1], r.ID[2]) 183 } 184 185 // SagaNext 186 type SagaNext struct { 187 // Saga etf.Pid, string (for the locally registered process), gen.ProcessID{process, node} (for the remote process) 188 Saga interface{} 189 // Value a value for the invoking HandleTxNew on a next hop. 190 Value interface{} 191 // Timeout how long this Saga will be waiting for the result from the next hop. Default - 10 seconds 192 Timeout uint 193 // TrapCancel if the next saga fails, it will transform the cancel signal into the regular message gen.MessageSagaCancel, and HandleSagaInfo callback will be invoked. 194 TrapCancel bool 195 196 // internal 197 done bool // for 2PC case 198 cancelTimer CancelFunc 199 } 200 201 // SagaJobID 202 type SagaJobID etf.Ref 203 204 // String 205 func (id SagaJobID) String() string { 206 r := etf.Ref(id) 207 return fmt.Sprintf("Job#%d.%d.%d", r.ID[0], r.ID[1], r.ID[2]) 208 } 209 210 // SagaJob 211 type SagaJob struct { 212 ID SagaJobID 213 TransactionID SagaTransactionID 214 Value interface{} 215 216 // internal 217 options SagaJobOptions 218 saga etf.Pid 219 commit bool 220 worker Process 221 done bool 222 cancelTimer CancelFunc 223 } 224 225 // SagaJobOptions 226 type SagaJobOptions struct { 227 Timeout uint 228 } 229 230 type messageSaga struct { 231 Request etf.Atom 232 Pid etf.Pid 233 Command interface{} 234 } 235 236 type messageSagaNext struct { 237 TransactionID etf.Ref 238 Origin etf.Ref 239 Value interface{} 240 Parents []etf.Pid 241 Options map[string]interface{} 242 } 243 244 type messageSagaResult struct { 245 TransactionID etf.Ref 246 Origin etf.Ref 247 Result interface{} 248 } 249 250 type messageSagaCancel struct { 251 TransactionID etf.Ref 252 Origin etf.Ref 253 Reason string 254 } 255 256 type messageSagaCommit struct { 257 TransactionID etf.Ref 258 Origin etf.Ref 259 Final interface{} 260 } 261 262 // MessageSagaCancel 263 type MessageSagaCancel struct { 264 TransactionID SagaTransactionID 265 NextID SagaNextID 266 Reason string 267 } 268 269 // MessageSagaError 270 type MessageSagaError struct { 271 TransactionID SagaTransactionID 272 NextID SagaNextID 273 Error string 274 Details string 275 } 276 277 // 278 // Saga API 279 // 280 281 type sagaSetMaxTransactions struct { 282 max uint 283 } 284 285 // SetMaxTransactions set maximum transactions fo the saga 286 func (gs *Saga) SetMaxTransactions(process Process, max uint) error { 287 if !process.IsAlive() { 288 return lib.ErrServerTerminated 289 } 290 message := sagaSetMaxTransactions{ 291 max: max, 292 } 293 _, err := process.Direct(message) 294 return err 295 } 296 297 // 298 // SagaProcess methods 299 // 300 301 // StartTransaction 302 func (sp *SagaProcess) StartTransaction(options SagaTransactionOptions, value interface{}) SagaTransactionID { 303 id := sp.MakeRef() 304 305 if options.HopLimit == 0 { 306 options.HopLimit = defaultHopLimit 307 } 308 if options.Lifespan == 0 { 309 options.Lifespan = defaultLifespan 310 } 311 312 message := etf.Tuple{ 313 etf.Atom("$saga_next"), 314 sp.Self(), 315 etf.Tuple{ 316 id, // tx id 317 etf.Ref{}, // origin. empty value. (parent's next id) 318 value, // tx value 319 []etf.Pid{}, // parents 320 etf.Map{ // tx options 321 "HopLimit": options.HopLimit, 322 "Lifespan": options.Lifespan, 323 "TwoPhaseCommit": options.TwoPhaseCommit, 324 }, 325 }, 326 } 327 328 sp.Send(sp.Self(), message) 329 return SagaTransactionID(id) 330 } 331 332 // Next 333 func (sp *SagaProcess) Next(id SagaTransactionID, next SagaNext) (SagaNextID, error) { 334 sp.mutexTXS.Lock() 335 tx, ok := sp.txs[id] 336 sp.mutexTXS.Unlock() 337 if !ok { 338 return SagaNextID{}, ErrSagaTxUnknown 339 } 340 341 if len(tx.next) > int(tx.options.HopLimit) { 342 return SagaNextID{}, fmt.Errorf("exceeded hop limit") 343 } 344 345 nextLifespan := int64(tx.options.Lifespan) - (time.Now().Unix() - tx.arrival) 346 if nextLifespan < 1 { 347 sp.CancelTransaction(id, "exceeded lifespan") 348 return SagaNextID{}, fmt.Errorf("exceeded lifespan. transaction canceled") 349 } 350 351 if next.Timeout > 0 && int64(next.Timeout) > nextLifespan { 352 return SagaNextID{}, fmt.Errorf("requested timeout exceed lifespan") 353 } 354 355 if next.Timeout > 0 { 356 nextLifespan = int64(next.Timeout) 357 } 358 359 ref := sp.MonitorProcess(next.Saga) 360 next_id := SagaNextID(ref) 361 message := etf.Tuple{ 362 etf.Atom("$saga_next"), 363 sp.Self(), 364 etf.Tuple{ 365 etf.Ref(tx.id), // tx id 366 ref, // next id (tx origin on the next saga) 367 next.Value, 368 tx.parents, 369 etf.Map{ 370 "HopLimit": tx.options.HopLimit, 371 "Lifespan": nextLifespan, 372 "TwoPhaseCommit": tx.options.TwoPhaseCommit, 373 }, 374 }, 375 } 376 377 sp.Send(next.Saga, message) 378 379 cancelMessage := etf.Tuple{ 380 etf.Atom("$saga_cancel"), 381 etf.Pid{}, // do not send sp.Self() to be able TrapCancel work 382 etf.Tuple{ 383 etf.Ref(tx.id), // tx id 384 ref, 385 "lifespan", 386 }, 387 } 388 timeout := time.Duration(nextLifespan) * time.Second 389 next.cancelTimer = sp.SendAfter(sp.Self(), cancelMessage, timeout) 390 391 tx.Lock() 392 tx.next[next_id] = &next 393 tx.Unlock() 394 395 sp.mutexNext.Lock() 396 sp.next[next_id] = tx 397 sp.mutexNext.Unlock() 398 399 return next_id, nil 400 } 401 402 // StartJob 403 func (sp *SagaProcess) StartJob(id SagaTransactionID, options SagaJobOptions, value interface{}) (SagaJobID, error) { 404 405 if sp.options.Worker == nil { 406 return SagaJobID{}, fmt.Errorf("This saga has no worker") 407 } 408 sp.mutexTXS.Lock() 409 tx, ok := sp.txs[id] 410 sp.mutexTXS.Unlock() 411 412 if !ok { 413 return SagaJobID{}, ErrSagaTxUnknown 414 } 415 416 jobLifespan := int64(tx.options.Lifespan) - (time.Now().Unix() - tx.arrival) 417 if options.Timeout > 0 && int64(options.Timeout) > jobLifespan { 418 return SagaJobID{}, fmt.Errorf("requested timeout exceed lifespan") 419 } 420 if options.Timeout > 0 { 421 jobLifespan = int64(options.Timeout) 422 } 423 424 workerOptions := ProcessOptions{} 425 worker, err := sp.Spawn("", workerOptions, sp.options.Worker) 426 if err != nil { 427 return SagaJobID{}, err 428 } 429 sp.Link(worker.Self()) 430 431 job := SagaJob{ 432 ID: SagaJobID(sp.MakeRef()), 433 TransactionID: id, 434 Value: value, 435 commit: tx.options.TwoPhaseCommit, 436 saga: sp.Self(), 437 worker: worker, 438 } 439 440 sp.mutexJobs.Lock() 441 sp.jobs[worker.Self()] = &job 442 sp.mutexJobs.Unlock() 443 444 m := messageSagaJobStart{ 445 job: job, 446 } 447 tx.Lock() 448 tx.jobs[job.ID] = worker.Self() 449 tx.Unlock() 450 451 sp.Cast(worker.Self(), m) 452 453 // terminate worker process via handleSagaExit 454 exitMessage := MessageExit{ 455 Pid: worker.Self(), 456 Reason: "lifespan", 457 } 458 459 timeout := time.Duration(jobLifespan) * time.Second 460 job.cancelTimer = sp.SendAfter(sp.Self(), exitMessage, timeout) 461 462 return job.ID, nil 463 } 464 465 // SendResult 466 func (sp *SagaProcess) SendResult(id SagaTransactionID, result interface{}) error { 467 sp.mutexTXS.Lock() 468 tx, ok := sp.txs[id] 469 sp.mutexTXS.Unlock() 470 if !ok { 471 return ErrSagaTxUnknown 472 } 473 474 if len(tx.parents) == 0 { 475 // SendResult was called right after CreateTransaction call. 476 return ErrSagaNotAllowed 477 } 478 479 if tx.done { 480 return ErrSagaResultAlreadySent 481 } 482 483 if sp.checkTxDone(tx) == false { 484 return ErrSagaTxInProgress 485 } 486 487 message := etf.Tuple{ 488 etf.Atom("$saga_result"), 489 sp.Self(), 490 etf.Tuple{ 491 etf.Ref(tx.id), 492 etf.Ref(tx.origin), 493 result, 494 }, 495 } 496 497 // send message to the parent saga 498 if err := sp.Send(tx.parents[0], message); err != nil { 499 return err 500 } 501 502 // tx handling is done on this saga 503 tx.done = true 504 505 // do not remove TX if we send result to itself 506 if tx.parents[0] == sp.Self() { 507 return nil 508 } 509 510 // do not remove TX if 2PC is enabled 511 if tx.options.TwoPhaseCommit { 512 return nil 513 } 514 515 sp.mutexTXS.Lock() 516 delete(sp.txs, id) 517 sp.mutexTXS.Unlock() 518 519 return nil 520 } 521 522 // SendInterim 523 func (sp *SagaProcess) SendInterim(id SagaTransactionID, interim interface{}) error { 524 sp.mutexTXS.Lock() 525 tx, ok := sp.txs[id] 526 sp.mutexTXS.Unlock() 527 if !ok { 528 return ErrSagaTxUnknown 529 } 530 531 message := etf.Tuple{ 532 etf.Atom("$saga_interim"), 533 sp.Self(), 534 etf.Tuple{ 535 etf.Ref(tx.id), 536 etf.Ref(tx.origin), 537 interim, 538 }, 539 } 540 541 // send message to the parent saga 542 if err := sp.Send(tx.parents[0], message); err != nil { 543 return err 544 } 545 546 return nil 547 } 548 549 // CancelTransaction 550 func (sp *SagaProcess) CancelTransaction(id SagaTransactionID, reason string) error { 551 sp.mutexTXS.Lock() 552 tx, ok := sp.txs[id] 553 sp.mutexTXS.Unlock() 554 if !ok { 555 return ErrSagaTxUnknown 556 } 557 558 message := etf.Tuple{ 559 etf.Atom("$saga_cancel"), 560 sp.Self(), 561 etf.Tuple{etf.Ref(tx.id), etf.Ref(tx.origin), reason}, 562 } 563 sp.Send(sp.Self(), message) 564 return nil 565 } 566 567 // CancelJob 568 func (sp *SagaProcess) CancelJob(id SagaTransactionID, job SagaJobID, reason string) error { 569 sp.mutexTXS.Lock() 570 tx, ok := sp.txs[id] 571 sp.mutexTXS.Unlock() 572 if !ok { 573 return ErrSagaTxUnknown 574 } 575 tx.Lock() 576 defer tx.Unlock() 577 return nil 578 } 579 580 func (sp *SagaProcess) checkTxDone(tx *SagaTransaction) bool { 581 if tx.options.TwoPhaseCommit == false { // 2PC is disabled 582 if len(tx.next) > 0 { // haven't received all results from the "next" sagas 583 return false 584 } 585 if len(tx.jobs) > 0 { // tx has running jobs 586 return false 587 } 588 return true 589 } 590 591 // 2PC is enabled. check whether received all results from sagas 592 // and workers have finished their jobs 593 594 tx.Lock() 595 // check results from sagas 596 for _, next := range tx.next { 597 if next.done == false { 598 tx.Unlock() 599 return false 600 } 601 } 602 603 if len(tx.jobs) == 0 { 604 tx.Unlock() 605 return true 606 } 607 608 // gen list of running workers 609 jobs := []etf.Pid{} 610 for _, pid := range tx.jobs { 611 jobs = append(jobs, pid) 612 } 613 tx.Unlock() 614 615 // check the job states of them 616 sp.mutexJobs.Lock() 617 for _, pid := range jobs { 618 job := sp.jobs[pid] 619 if job.done == false { 620 sp.mutexJobs.Unlock() 621 return false 622 } 623 } 624 sp.mutexJobs.Unlock() 625 return true 626 } 627 628 func (sp *SagaProcess) handleSagaRequest(m messageSaga) error { 629 630 switch m.Request { 631 case etf.Atom("$saga_next"): 632 nextMessage := messageSagaNext{} 633 634 if err := etf.TermIntoStruct(m.Command, &nextMessage); err != nil { 635 return lib.ErrUnsupportedRequest 636 } 637 638 // Check if exceed the number of transaction on this saga 639 if sp.options.MaxTransactions > 0 && len(sp.txs)+1 > int(sp.options.MaxTransactions) { 640 cancel := etf.Tuple{ 641 etf.Atom("$saga_cancel"), 642 sp.Self(), 643 etf.Tuple{ 644 nextMessage.TransactionID, 645 nextMessage.Origin, 646 "exceed_tx_limit", 647 }, 648 } 649 sp.Send(m.Pid, cancel) 650 return nil 651 } 652 653 // Check for the loop 654 transactionID := SagaTransactionID(nextMessage.TransactionID) 655 sp.mutexTXS.Lock() 656 tx, ok := sp.txs[transactionID] 657 sp.mutexTXS.Unlock() 658 if ok { 659 // loop detected. send cancel message 660 cancel := etf.Tuple{ 661 etf.Atom("$saga_cancel"), 662 sp.Self(), 663 etf.Tuple{ 664 nextMessage.TransactionID, 665 nextMessage.Origin, 666 "loop_detected", 667 }, 668 } 669 sp.Send(m.Pid, cancel) 670 return nil 671 } 672 673 txOptions := SagaTransactionOptions{ 674 HopLimit: defaultHopLimit, 675 Lifespan: defaultLifespan, 676 } 677 if value, ok := nextMessage.Options["HopLimit"]; ok { 678 if hoplimit, ok := value.(int64); ok { 679 txOptions.HopLimit = uint(hoplimit) 680 } 681 } 682 if value, ok := nextMessage.Options["Lifespan"]; ok { 683 if lifespan, ok := value.(int64); ok && lifespan > 0 { 684 txOptions.Lifespan = uint(lifespan) 685 } 686 } 687 if value, ok := nextMessage.Options["TwoPhaseCommit"]; ok { 688 txOptions.TwoPhaseCommit, _ = value.(bool) 689 } 690 691 tx = &SagaTransaction{ 692 id: transactionID, 693 options: txOptions, 694 origin: SagaNextID(nextMessage.Origin), 695 next: make(map[SagaNextID]*SagaNext), 696 jobs: make(map[SagaJobID]etf.Pid), 697 arrival: time.Now().Unix(), 698 parents: append([]etf.Pid{m.Pid}, nextMessage.Parents...), 699 } 700 sp.mutexTXS.Lock() 701 sp.txs[transactionID] = tx 702 sp.mutexTXS.Unlock() 703 704 // do not monitor itself (they are equal if its came from the StartTransaction call) 705 if m.Pid != sp.Self() { 706 tx.monitor = sp.MonitorProcess(m.Pid) 707 } 708 709 // tx lifespan timer 710 cancelMessage := etf.Tuple{ 711 etf.Atom("$saga_cancel"), 712 sp.Self(), // can't be trapped (ignored) 713 etf.Tuple{ 714 nextMessage.TransactionID, 715 nextMessage.Origin, 716 "lifespan", 717 }, 718 } 719 timeout := time.Duration(txOptions.Lifespan) * time.Second 720 tx.cancelTimer = sp.SendAfter(sp.Self(), cancelMessage, timeout) 721 722 return sp.behavior.HandleTxNew(sp, transactionID, nextMessage.Value) 723 724 case "$saga_cancel": 725 cancel := messageSagaCancel{} 726 if err := etf.TermIntoStruct(m.Command, &cancel); err != nil { 727 return lib.ErrUnsupportedRequest 728 } 729 730 tx, exist := sp.txs[SagaTransactionID(cancel.TransactionID)] 731 if !exist { 732 // unknown tx, just ignore it 733 return nil 734 } 735 736 // check where it came from. 737 if tx.parents[0] == m.Pid { 738 // came from parent saga or from itself via CancelTransaction 739 // can't be ignored 740 sp.cancelTX(m.Pid, cancel, tx) 741 return sp.behavior.HandleTxCancel(sp, tx.id, cancel.Reason) 742 } 743 744 // this cancel came from one of the next sagas 745 // or from itself (being in the middle of transaction graph) 746 next_id := SagaNextID(cancel.Origin) 747 tx.Lock() 748 next, ok := tx.next[next_id] 749 tx.Unlock() 750 751 if ok && next.TrapCancel { 752 // clean the next saga stuff 753 next.cancelTimer() 754 sp.DemonitorProcess(cancel.Origin) 755 tx.Lock() 756 delete(tx.next, next_id) 757 tx.Unlock() 758 sp.mutexNext.Lock() 759 delete(sp.next, next_id) 760 sp.mutexNext.Unlock() 761 762 // came from the next saga and TrapCancel was enabled 763 cm := MessageSagaCancel{ 764 TransactionID: tx.id, 765 NextID: next_id, 766 Reason: cancel.Reason, 767 } 768 sp.Send(sp.Self(), cm) 769 return SagaStatusOK 770 } 771 772 sp.cancelTX(m.Pid, cancel, tx) 773 return sp.behavior.HandleTxCancel(sp, tx.id, cancel.Reason) 774 775 case etf.Atom("$saga_result"): 776 result := messageSagaResult{} 777 if err := etf.TermIntoStruct(m.Command, &result); err != nil { 778 return lib.ErrUnsupportedRequest 779 } 780 781 transactionID := SagaTransactionID(result.TransactionID) 782 sp.mutexTXS.Lock() 783 tx, ok := sp.txs[transactionID] 784 sp.mutexTXS.Unlock() 785 if !ok { 786 // ignore unknown TX 787 return nil 788 } 789 790 next_id := SagaNextID(result.Origin) 791 empty_next_id := SagaNextID{} 792 // next id is empty if we got result on a saga created this TX 793 if next_id != empty_next_id { 794 sp.mutexNext.Lock() 795 _, ok := sp.next[next_id] 796 sp.mutexNext.Unlock() 797 if !ok { 798 // ignore unknown result 799 return nil 800 } 801 sp.mutexNext.Lock() 802 delete(sp.next, next_id) 803 sp.mutexNext.Unlock() 804 805 tx.Lock() 806 next := tx.next[next_id] 807 if tx.options.TwoPhaseCommit == false { 808 next.cancelTimer() 809 sp.DemonitorProcess(result.Origin) 810 delete(tx.next, next_id) 811 } else { 812 next.done = true 813 } 814 tx.Unlock() 815 816 return sp.behavior.HandleTxResult(sp, tx.id, next_id, result.Result) 817 } 818 819 final, status := sp.behavior.HandleTxDone(sp, tx.id, result.Result) 820 if status == SagaStatusOK { 821 sp.commitTX(tx, final) 822 } 823 824 return status 825 826 case etf.Atom("$saga_interim"): 827 interim := messageSagaResult{} 828 if err := etf.TermIntoStruct(m.Command, &interim); err != nil { 829 return lib.ErrUnsupportedRequest 830 } 831 next_id := SagaNextID(interim.Origin) 832 sp.mutexNext.Lock() 833 tx, ok := sp.next[next_id] 834 sp.mutexNext.Unlock() 835 if !ok { 836 // ignore unknown interim result and send cancel message to the sender 837 message := etf.Tuple{ 838 etf.Atom("$saga_cancel"), 839 sp.Self(), 840 etf.Tuple{ 841 interim.TransactionID, 842 interim.Origin, 843 "unknown or canceled tx", 844 }, 845 } 846 sp.Send(m.Pid, message) 847 return nil 848 } 849 return sp.behavior.HandleTxInterim(sp, tx.id, next_id, interim.Result) 850 851 case etf.Atom("$saga_commit"): 852 // propagate Commit signal if 2PC is enabled 853 commit := messageSagaCommit{} 854 if err := etf.TermIntoStruct(m.Command, &commit); err != nil { 855 return lib.ErrUnsupportedRequest 856 } 857 transactionID := SagaTransactionID(commit.TransactionID) 858 sp.mutexTXS.Lock() 859 tx, ok := sp.txs[transactionID] 860 sp.mutexTXS.Unlock() 861 if !ok { 862 // ignore unknown TX 863 return nil 864 } 865 // clean up and send commit message before we invoke callback 866 sp.commitTX(tx, commit.Final) 867 // make sure if 2PC was enabled on this TX 868 if tx.options.TwoPhaseCommit { 869 return sp.behavior.HandleTxCommit(sp, tx.id, commit.Final) 870 } 871 return SagaStatusOK 872 } 873 return lib.ErrUnsupportedRequest 874 } 875 876 func (sp *SagaProcess) cancelTX(from etf.Pid, cancel messageSagaCancel, tx *SagaTransaction) { 877 tx.cancelTimer() 878 879 // stop workers 880 tx.Lock() 881 cancelJobs := []etf.Pid{} 882 for _, pid := range tx.jobs { 883 sp.Unlink(pid) 884 sp.Cast(pid, messageSagaJobCancel{reason: cancel.Reason}) 885 cancelJobs = append(cancelJobs, pid) 886 } 887 tx.Unlock() 888 889 sp.mutexJobs.Lock() 890 for i := range cancelJobs { 891 job, ok := sp.jobs[cancelJobs[i]] 892 if ok { 893 delete(sp.jobs, cancelJobs[i]) 894 job.cancelTimer() 895 } 896 } 897 sp.mutexJobs.Unlock() 898 899 // remove monitor from parent saga 900 if tx.parents[0] != sp.Self() { 901 sp.DemonitorProcess(tx.monitor) 902 903 // do not send to the parent saga if it came from there 904 // and cancelation reason caused by lifespan timer 905 if tx.parents[0] != from && cancel.Reason != "lifespan" { 906 cm := etf.Tuple{ 907 etf.Atom("$saga_cancel"), 908 sp.Self(), 909 etf.Tuple{ 910 cancel.TransactionID, 911 etf.Ref(tx.origin), 912 cancel.Reason, 913 }, 914 } 915 sp.Send(tx.parents[0], cm) 916 } 917 } 918 919 // send cancel to all next sagas except the saga this cancel came from 920 sp.mutexNext.Lock() 921 for nxtid, nxt := range tx.next { 922 ref := etf.Ref(nxtid) 923 // remove monitor from the next saga 924 sp.DemonitorProcess(ref) 925 delete(sp.next, nxtid) 926 nxt.cancelTimer() 927 928 if cancel.Reason == "lifespan" { 929 // do not send if the cancelation caused by lifespan timer 930 continue 931 } 932 if ref == cancel.Origin { 933 // do not send to the parent if it came from there 934 continue 935 } 936 937 cm := etf.Tuple{ 938 etf.Atom("$saga_cancel"), 939 sp.Self(), 940 etf.Tuple{ 941 cancel.TransactionID, 942 ref, 943 cancel.Reason, 944 }, 945 } 946 if err := sp.Send(nxt.Saga, cm); err != nil { 947 errmessage := MessageSagaError{ 948 TransactionID: tx.id, 949 NextID: nxtid, 950 Error: "can't send cancel message", 951 Details: err.Error(), 952 } 953 sp.Send(sp.Self(), errmessage) 954 } 955 } 956 sp.mutexNext.Unlock() 957 958 // remove tx from this saga 959 sp.mutexTXS.Lock() 960 delete(sp.txs, tx.id) 961 sp.mutexTXS.Unlock() 962 } 963 964 func (sp *SagaProcess) commitTX(tx *SagaTransaction, final interface{}) { 965 tx.cancelTimer() 966 // remove tx from this saga 967 sp.mutexTXS.Lock() 968 delete(sp.txs, tx.id) 969 sp.mutexTXS.Unlock() 970 971 // send commit message to all workers 972 for _, pid := range tx.jobs { 973 // unlink before this worker stopped 974 sp.Unlink(pid) 975 // do nothing if 2PC option is disabled 976 if tx.options.TwoPhaseCommit == false { 977 continue 978 } 979 // send commit message 980 sp.Cast(pid, messageSagaJobCommit{final: final}) 981 } 982 // remove monitor from parent saga 983 sp.DemonitorProcess(tx.monitor) 984 985 sp.mutexNext.Lock() 986 for nxtid, nxt := range tx.next { 987 ref := etf.Ref(nxtid) 988 // remove monitor from the next saga 989 sp.DemonitorProcess(ref) 990 991 delete(sp.next, nxtid) 992 nxt.cancelTimer() 993 // send commit message 994 if tx.options.TwoPhaseCommit == false { 995 continue 996 } 997 cm := etf.Tuple{ 998 etf.Atom("$saga_commit"), 999 sp.Self(), 1000 etf.Tuple{ 1001 etf.Ref(tx.id), // tx id 1002 ref, // origin (next_id) 1003 final, // final result 1004 }, 1005 } 1006 if err := sp.Send(nxt.Saga, cm); err != nil { 1007 errmessage := MessageSagaError{ 1008 TransactionID: tx.id, 1009 NextID: nxtid, 1010 Error: "can't send commit message", 1011 Details: err.Error(), 1012 } 1013 sp.Send(sp.Self(), errmessage) 1014 } 1015 } 1016 sp.mutexNext.Unlock() 1017 1018 } 1019 1020 func (sp *SagaProcess) handleSagaExit(exit MessageExit) error { 1021 sp.mutexJobs.Lock() 1022 job, ok := sp.jobs[exit.Pid] 1023 sp.mutexJobs.Unlock() 1024 if !ok { 1025 // passthrough this message to HandleSagaInfo callback 1026 return ErrSagaJobUnknown 1027 } 1028 1029 if exit.Reason == "lifespan" { 1030 sp.Unlink(job.worker.Self()) 1031 job.worker.Exit(exit.Reason) 1032 } else { 1033 job.cancelTimer() 1034 } 1035 1036 // remove it from saga job list 1037 sp.mutexJobs.Lock() 1038 delete(sp.jobs, exit.Pid) 1039 sp.mutexJobs.Unlock() 1040 1041 // check if this tx is still alive 1042 sp.mutexTXS.Lock() 1043 tx, ok := sp.txs[job.TransactionID] 1044 sp.mutexTXS.Unlock() 1045 if !ok { 1046 // seems it was already canceled 1047 return SagaStatusOK 1048 } 1049 1050 // remove it from the tx job list 1051 tx.Lock() 1052 delete(tx.jobs, job.ID) 1053 tx.Unlock() 1054 1055 // if this job is done, don't care about the termination reason 1056 if job.done { 1057 return SagaStatusOK 1058 } 1059 1060 if exit.Reason != "normal" { 1061 return sp.behavior.HandleJobFailed(sp, job.TransactionID, job.ID, exit.Reason) 1062 } 1063 1064 // seems no result received from this worker 1065 return sp.behavior.HandleJobFailed(sp, job.TransactionID, job.ID, "no result") 1066 } 1067 1068 func (sp *SagaProcess) handleSagaDown(down MessageDown) error { 1069 1070 sp.mutexNext.Lock() 1071 tx, ok := sp.next[SagaNextID(down.Ref)] 1072 sp.mutexNext.Unlock() 1073 if ok { 1074 // got DOWN message from the next saga 1075 empty := etf.Pid{} 1076 reason := fmt.Sprintf("next saga %s is down", down.Pid) 1077 if down.Pid == empty { 1078 // monitored by name 1079 reason = fmt.Sprintf("next saga %s is down", down.ProcessID) 1080 } 1081 message := etf.Tuple{ 1082 etf.Atom("$saga_cancel"), 1083 down.Pid, 1084 etf.Tuple{etf.Ref(tx.id), down.Ref, reason}, 1085 } 1086 sp.Send(sp.Self(), message) 1087 return nil 1088 } 1089 1090 sp.mutexTXS.Lock() 1091 for _, tx := range sp.txs { 1092 if down.Ref != tx.monitor { 1093 continue 1094 } 1095 1096 // got DOWN message from the parent saga 1097 reason := fmt.Sprintf("parent saga %s is down", down.Pid) 1098 message := etf.Tuple{ 1099 etf.Atom("$saga_cancel"), 1100 down.Pid, 1101 etf.Tuple{etf.Ref(tx.id), down.Ref, reason}, 1102 } 1103 sp.Send(sp.Self(), message) 1104 sp.mutexTXS.Unlock() 1105 return nil 1106 } 1107 sp.mutexTXS.Unlock() 1108 1109 // down.Ref is unknown. Return ErrSagaUnknown to passthrough 1110 // this message to HandleSagaInfo callback 1111 return ErrSagaUnknown 1112 } 1113 1114 // 1115 // Server callbacks 1116 // 1117 1118 // Init 1119 func (gs *Saga) Init(process *ServerProcess, args ...etf.Term) error { 1120 var options SagaOptions 1121 1122 behavior, ok := process.Behavior().(SagaBehavior) 1123 if !ok { 1124 return fmt.Errorf("Saga: not a SagaBehavior") 1125 } 1126 1127 sagaProcess := &SagaProcess{ 1128 ServerProcess: *process, 1129 txs: make(map[SagaTransactionID]*SagaTransaction), 1130 next: make(map[SagaNextID]*SagaTransaction), 1131 behavior: behavior, 1132 } 1133 // do not inherit parent State 1134 sagaProcess.State = nil 1135 1136 options, err := behavior.InitSaga(sagaProcess, args...) 1137 if err != nil { 1138 return err 1139 } 1140 1141 sagaProcess.options = options 1142 process.State = sagaProcess 1143 1144 if options.Worker != nil { 1145 sagaProcess.jobs = make(map[etf.Pid]*SagaJob) 1146 } 1147 1148 process.SetTrapExit(true) 1149 1150 return nil 1151 } 1152 1153 // HandleCall 1154 func (gs *Saga) HandleCall(process *ServerProcess, from ServerFrom, message etf.Term) (etf.Term, ServerStatus) { 1155 sp := process.State.(*SagaProcess) 1156 return sp.behavior.HandleSagaCall(sp, from, message) 1157 } 1158 1159 // HandleDirect 1160 func (gs *Saga) HandleDirect(process *ServerProcess, ref etf.Ref, message interface{}) (interface{}, DirectStatus) { 1161 sp := process.State.(*SagaProcess) 1162 switch m := message.(type) { 1163 case sagaSetMaxTransactions: 1164 sp.options.MaxTransactions = m.max 1165 return nil, DirectStatusOK 1166 default: 1167 return sp.behavior.HandleSagaDirect(sp, ref, message) 1168 } 1169 } 1170 1171 // HandleCast 1172 func (gs *Saga) HandleCast(process *ServerProcess, message etf.Term) ServerStatus { 1173 var status SagaStatus 1174 1175 sp := process.State.(*SagaProcess) 1176 1177 switch m := message.(type) { 1178 case messageSagaJobResult: 1179 sp.mutexJobs.Lock() 1180 job, ok := sp.jobs[m.pid] 1181 sp.mutexJobs.Unlock() 1182 if !ok { 1183 // kill this process 1184 if worker := process.ProcessByPid(m.pid); worker != nil { 1185 process.Unlink(worker.Self()) 1186 worker.Kill() 1187 } 1188 status = SagaStatusOK 1189 break 1190 } 1191 job.done = true 1192 job.cancelTimer() 1193 1194 sp.mutexTXS.Lock() 1195 tx, ok := sp.txs[job.TransactionID] 1196 sp.mutexTXS.Unlock() 1197 1198 if !ok { 1199 // tx is already canceled. kill this worker if its still alive (tx might have had 1200 // 2PC enabled, and the worker is waiting for the commit message) 1201 process.Unlink(job.worker.Self()) 1202 job.worker.Kill() 1203 status = SagaStatusOK 1204 break 1205 } 1206 1207 // remove this job from the tx job list, but do not remove 1208 // from the sp.jobs (will be removed once worker terminated) 1209 if tx.options.TwoPhaseCommit == false { 1210 tx.Lock() 1211 delete(tx.jobs, job.ID) 1212 tx.Unlock() 1213 } 1214 1215 status = sp.behavior.HandleJobResult(sp, job.TransactionID, job.ID, m.result) 1216 1217 case messageSagaJobInterim: 1218 sp.mutexJobs.Lock() 1219 job, ok := sp.jobs[m.pid] 1220 sp.mutexJobs.Unlock() 1221 if !ok { 1222 // kill this process 1223 if worker := process.ProcessByPid(m.pid); worker != nil { 1224 process.Unlink(worker.Self()) 1225 worker.Kill() 1226 } 1227 // tx was canceled. just ignore it 1228 status = SagaStatusOK 1229 break 1230 } 1231 status = sp.behavior.HandleJobInterim(sp, job.TransactionID, job.ID, m.interim) 1232 1233 default: 1234 status = sp.behavior.HandleSagaCast(sp, message) 1235 } 1236 1237 switch status { 1238 case SagaStatusOK: 1239 return ServerStatusOK 1240 case SagaStatusStop: 1241 return ServerStatusStop 1242 default: 1243 return ServerStatus(status) 1244 } 1245 } 1246 1247 // HandleInfo 1248 func (gs *Saga) HandleInfo(process *ServerProcess, message etf.Term) ServerStatus { 1249 var mSaga messageSaga 1250 1251 sp := process.State.(*SagaProcess) 1252 switch m := message.(type) { 1253 case MessageExit: 1254 // handle worker exit message 1255 err := sp.handleSagaExit(m) 1256 if err == ErrSagaJobUnknown { 1257 return sp.behavior.HandleSagaInfo(sp, m) 1258 } 1259 return ServerStatus(err) 1260 1261 case MessageDown: 1262 // handle saga's down message 1263 err := sp.handleSagaDown(m) 1264 if err == ErrSagaUnknown { 1265 return sp.behavior.HandleSagaInfo(sp, m) 1266 } 1267 return ServerStatus(err) 1268 } 1269 1270 if err := etf.TermIntoStruct(message, &mSaga); err != nil { 1271 return sp.behavior.HandleSagaInfo(sp, message) 1272 } 1273 1274 status := sp.handleSagaRequest(mSaga) 1275 switch status { 1276 case nil, SagaStatusOK: 1277 return ServerStatusOK 1278 case SagaStatusStop: 1279 return ServerStatusStop 1280 case lib.ErrUnsupportedRequest: 1281 return sp.behavior.HandleSagaInfo(sp, message) 1282 default: 1283 return ServerStatus(status) 1284 } 1285 } 1286 1287 // 1288 // default Saga callbacks 1289 // 1290 1291 // HandleTxInterim 1292 func (gs *Saga) HandleTxInterim(process *SagaProcess, id SagaTransactionID, from SagaNextID, interim interface{}) SagaStatus { 1293 lib.Warning("HandleTxInterim: [%v %v] unhandled message %#v", id, from, interim) 1294 return ServerStatusOK 1295 } 1296 1297 // HandleTxCommit 1298 func (gs *Saga) HandleTxCommit(process *SagaProcess, id SagaTransactionID, final interface{}) SagaStatus { 1299 lib.Warning("HandleTxCommit: [%v] unhandled message", id) 1300 return ServerStatusOK 1301 } 1302 1303 // HandleTxDone 1304 func (gs *Saga) HandleTxDone(process *SagaProcess, id SagaTransactionID, result interface{}) (interface{}, SagaStatus) { 1305 return nil, fmt.Errorf("Saga [%v:%v] has no implementation of HandleTxDone method", process.Self(), process.Name()) 1306 } 1307 1308 // HandleSagaCall 1309 func (gs *Saga) HandleSagaCall(process *SagaProcess, from ServerFrom, message etf.Term) (etf.Term, ServerStatus) { 1310 lib.Warning("HandleSagaCall: unhandled message (from %#v) %#v", from, message) 1311 return etf.Atom("ok"), ServerStatusOK 1312 } 1313 1314 // HandleSagaCast 1315 func (gs *Saga) HandleSagaCast(process *SagaProcess, message etf.Term) ServerStatus { 1316 lib.Warning("HandleSagaCast: unhandled message %#v", message) 1317 return ServerStatusOK 1318 } 1319 1320 // HandleSagaInfo 1321 func (gs *Saga) HandleSagaInfo(process *SagaProcess, message etf.Term) ServerStatus { 1322 lib.Warning("HandleSagaInfo: unhandled message %#v", message) 1323 return ServerStatusOK 1324 } 1325 1326 // HandleSagaDirect 1327 func (gs *Saga) HandleSagaDirect(process *SagaProcess, ref etf.Ref, message interface{}) (interface{}, DirectStatus) { 1328 return nil, lib.ErrUnsupportedRequest 1329 } 1330 1331 // HandleJobResult 1332 func (gs *Saga) HandleJobResult(process *SagaProcess, id SagaTransactionID, from SagaJobID, result interface{}) SagaStatus { 1333 lib.Warning("HandleJobResult: [%v %v] unhandled message %#v", id, from, result) 1334 return SagaStatusOK 1335 } 1336 1337 // HandleJobInterim 1338 func (gs *Saga) HandleJobInterim(process *SagaProcess, id SagaTransactionID, from SagaJobID, interim interface{}) SagaStatus { 1339 lib.Warning("HandleJobInterim: [%v %v] unhandled message %#v", id, from, interim) 1340 return SagaStatusOK 1341 } 1342 1343 // HandleJobFailed 1344 func (gs *Saga) HandleJobFailed(process *SagaProcess, id SagaTransactionID, from SagaJobID, reason string) SagaStatus { 1345 lib.Warning("HandleJobFailed: [%v %v] unhandled message. reason %q", id, from, reason) 1346 return nil 1347 }