github.com/decred/dcrlnd@v0.7.6/routing/payment_lifecycle.go (about) 1 package routing 2 3 import ( 4 "fmt" 5 "sync" 6 "time" 7 8 "github.com/davecgh/go-spew/spew" 9 "github.com/decred/dcrd/dcrec/secp256k1/v4" 10 "github.com/decred/dcrlnd/channeldb" 11 "github.com/decred/dcrlnd/htlcswitch" 12 "github.com/decred/dcrlnd/lntypes" 13 "github.com/decred/dcrlnd/lnwire" 14 "github.com/decred/dcrlnd/routing/route" 15 "github.com/decred/dcrlnd/routing/shards" 16 sphinx "github.com/decred/lightning-onion/v4" 17 ) 18 19 // errShardHandlerExiting is returned from the shardHandler when it exits. 20 var errShardHandlerExiting = fmt.Errorf("shard handler exiting") 21 22 // paymentLifecycle holds all information about the current state of a payment 23 // needed to resume if from any point. 24 type paymentLifecycle struct { 25 router *ChannelRouter 26 totalAmount lnwire.MilliAtom 27 feeLimit lnwire.MilliAtom 28 identifier lntypes.Hash 29 paySession PaymentSession 30 shardTracker shards.ShardTracker 31 timeoutChan <-chan time.Time 32 currentHeight int32 33 } 34 35 // payemntState holds a number of key insights learned from a given MPPayment 36 // that we use to determine what to do on each payment loop iteration. 37 type paymentState struct { 38 numShardsInFlight int 39 remainingAmt lnwire.MilliAtom 40 remainingFees lnwire.MilliAtom 41 42 // terminate indicates the payment is in its final stage and no more 43 // shards should be launched. This value is true if we have an HTLC 44 // settled or the payment has an error. 45 terminate bool 46 } 47 48 // terminated returns a bool to indicate there are no further actions needed 49 // and we should return what we have, either the payment preimage or the 50 // payment error. 51 func (ps paymentState) terminated() bool { 52 // If the payment is in final stage and we have no in flight shards to 53 // wait result for, we consider the whole action terminated. 54 return ps.terminate && ps.numShardsInFlight == 0 55 } 56 57 // needWaitForShards returns a bool to specify whether we need to wait for the 58 // outcome of the shanrdHandler. 59 func (ps paymentState) needWaitForShards() bool { 60 // If we have in flight shards and the payment is in final stage, we 61 // need to wait for the outcomes from the shards. Or if we have no more 62 // money to be sent, we need to wait for the already launched shards. 63 if ps.numShardsInFlight == 0 { 64 return false 65 } 66 return ps.terminate || ps.remainingAmt == 0 67 } 68 69 // fetchPaymentState will query the db for the latest payment state 70 // information we need to act on every iteration of the payment loop and update 71 // the paymentState. 72 func (p *paymentLifecycle) fetchPaymentState() (*channeldb.MPPayment, 73 *paymentState, error) { 74 75 // Fetch the latest payment from db. 76 payment, err := p.router.cfg.Control.FetchPayment(p.identifier) 77 if err != nil { 78 return nil, nil, err 79 } 80 81 // Fetch the total amount and fees that has already been sent in 82 // settled and still in-flight shards. 83 sentAmt, fees := payment.SentAmt() 84 85 // Sanity check we haven't sent a value larger than the payment amount. 86 if sentAmt > p.totalAmount { 87 return nil, nil, fmt.Errorf("amount sent %v exceeds "+ 88 "total amount %v", sentAmt, p.totalAmount) 89 } 90 91 // We'll subtract the used fee from our fee budget, but allow the fees 92 // of the already sent shards to exceed our budget (can happen after 93 // restarts). 94 feeBudget := p.feeLimit 95 if fees <= feeBudget { 96 feeBudget -= fees 97 } else { 98 feeBudget = 0 99 } 100 101 // Get any terminal info for this payment. 102 settle, failure := payment.TerminalInfo() 103 104 // If either an HTLC settled, or the payment has a payment level 105 // failure recorded, it means we should terminate the moment all shards 106 // have returned with a result. 107 terminate := settle != nil || failure != nil 108 109 // Update the payment state. 110 state := &paymentState{ 111 numShardsInFlight: len(payment.InFlightHTLCs()), 112 remainingAmt: p.totalAmount - sentAmt, 113 remainingFees: feeBudget, 114 terminate: terminate, 115 } 116 117 return payment, state, nil 118 } 119 120 // resumePayment resumes the paymentLifecycle from the current state. 121 func (p *paymentLifecycle) resumePayment() ([32]byte, *route.Route, error) { 122 shardHandler := &shardHandler{ 123 router: p.router, 124 identifier: p.identifier, 125 shardTracker: p.shardTracker, 126 shardErrors: make(chan error), 127 quit: make(chan struct{}), 128 paySession: p.paySession, 129 } 130 131 // When the payment lifecycle loop exits, we make sure to signal any 132 // sub goroutine of the shardHandler to exit, then wait for them to 133 // return. 134 defer shardHandler.stop() 135 136 // If we had any existing attempts outstanding, we'll start by spinning 137 // up goroutines that'll collect their results and deliver them to the 138 // lifecycle loop below. 139 payment, _, err := p.fetchPaymentState() 140 if err != nil { 141 return [32]byte{}, nil, err 142 } 143 144 for _, a := range payment.InFlightHTLCs() { 145 a := a 146 147 log.Infof("Resuming payment shard %v for payment %v", 148 a.AttemptID, p.identifier) 149 150 shardHandler.collectResultAsync(&a.HTLCAttemptInfo) 151 } 152 153 // We'll continue until either our payment succeeds, or we encounter a 154 // critical error during path finding. 155 lifecycle: 156 for { 157 // Start by quickly checking if there are any outcomes already 158 // available to handle before we reevaluate our state. 159 if err := shardHandler.checkShards(); err != nil { 160 return [32]byte{}, nil, err 161 } 162 163 // We update the payment state on every iteration. Since the 164 // payment state is affected by multiple goroutines (ie, 165 // collectResultAsync), it is NOT guaranteed that we always 166 // have the latest state here. This is fine as long as the 167 // state is consistent as a whole. 168 payment, currentState, err := p.fetchPaymentState() 169 if err != nil { 170 return [32]byte{}, nil, err 171 } 172 173 log.Debugf("Payment %v in state terminate=%v, "+ 174 "active_shards=%v, rem_value=%v, fee_limit=%v", 175 p.identifier, currentState.terminate, 176 currentState.numShardsInFlight, 177 currentState.remainingAmt, currentState.remainingFees, 178 ) 179 180 // TODO(yy): sanity check all the states to make sure 181 // everything is expected. 182 switch { 183 184 // We have a terminal condition and no active shards, we are 185 // ready to exit. 186 case currentState.terminated(): 187 // Find the first successful shard and return 188 // the preimage and route. 189 for _, a := range payment.HTLCs { 190 if a.Settle != nil { 191 return a.Settle.Preimage, &a.Route, nil 192 } 193 } 194 195 // Payment failed. 196 return [32]byte{}, nil, *payment.FailureReason 197 198 // If we either reached a terminal error condition (but had 199 // active shards still) or there is no remaining value to send, 200 // we'll wait for a shard outcome. 201 case currentState.needWaitForShards(): 202 // We still have outstanding shards, so wait for a new 203 // outcome to be available before re-evaluating our 204 // state. 205 if err := shardHandler.waitForShard(); err != nil { 206 return [32]byte{}, nil, err 207 } 208 continue lifecycle 209 } 210 211 // Before we attempt any new shard, we'll check to see if 212 // either we've gone past the payment attempt timeout, or the 213 // router is exiting. In either case, we'll stop this payment 214 // attempt short. If a timeout is not applicable, timeoutChan 215 // will be nil. 216 select { 217 case <-p.timeoutChan: 218 log.Warnf("payment attempt not completed before " + 219 "timeout") 220 221 // By marking the payment failed with the control 222 // tower, no further shards will be launched and we'll 223 // return with an error the moment all active shards 224 // have finished. 225 saveErr := p.router.cfg.Control.Fail( 226 p.identifier, channeldb.FailureReasonTimeout, 227 ) 228 if saveErr != nil { 229 return [32]byte{}, nil, saveErr 230 } 231 232 continue lifecycle 233 234 case <-p.router.quit: 235 return [32]byte{}, nil, ErrRouterShuttingDown 236 237 // Fall through if we haven't hit our time limit. 238 default: 239 } 240 241 // Create a new payment attempt from the given payment session. 242 rt, err := p.paySession.RequestRoute( 243 currentState.remainingAmt, currentState.remainingFees, 244 uint32(currentState.numShardsInFlight), 245 uint32(p.currentHeight), 246 ) 247 if err != nil { 248 log.Warnf("Failed to find route for payment %v: %v", 249 p.identifier, err) 250 251 routeErr, ok := err.(noRouteError) 252 if !ok { 253 return [32]byte{}, nil, err 254 } 255 256 // There is no route to try, and we have no active 257 // shards. This means that there is no way for us to 258 // send the payment, so mark it failed with no route. 259 if currentState.numShardsInFlight == 0 { 260 failureCode := routeErr.FailureReason() 261 log.Debugf("Marking payment %v permanently "+ 262 "failed with no route: %v", 263 p.identifier, failureCode) 264 265 saveErr := p.router.cfg.Control.Fail( 266 p.identifier, failureCode, 267 ) 268 if saveErr != nil { 269 return [32]byte{}, nil, saveErr 270 } 271 272 continue lifecycle 273 } 274 275 // We still have active shards, we'll wait for an 276 // outcome to be available before retrying. 277 if err := shardHandler.waitForShard(); err != nil { 278 return [32]byte{}, nil, err 279 } 280 continue lifecycle 281 } 282 283 // If this route will consume the last remeining amount to send 284 // to the receiver, this will be our last shard (for now). 285 lastShard := rt.ReceiverAmt() == currentState.remainingAmt 286 287 // We found a route to try, launch a new shard. 288 attempt, outcome, err := shardHandler.launchShard(rt, lastShard) 289 switch { 290 // We may get a terminal error if we've processed a shard with 291 // a terminal state (settled or permanent failure), while we 292 // were pathfinding. We know we're in a terminal state here, 293 // so we can continue and wait for our last shards to return. 294 case err == channeldb.ErrPaymentTerminal: 295 log.Infof("Payment %v in terminal state, abandoning "+ 296 "shard", p.identifier) 297 298 continue lifecycle 299 300 case err != nil: 301 return [32]byte{}, nil, err 302 } 303 304 // If we encountered a non-critical error when launching the 305 // shard, handle it. 306 if outcome.err != nil { 307 log.Warnf("Failed to launch shard %v for "+ 308 "payment %v: %v", attempt.AttemptID, 309 p.identifier, outcome.err) 310 311 // We must inspect the error to know whether it was 312 // critical or not, to decide whether we should 313 // continue trying. 314 err := shardHandler.handleSendError( 315 attempt, outcome.err, 316 ) 317 if err != nil { 318 return [32]byte{}, nil, err 319 } 320 321 // Error was handled successfully, continue to make a 322 // new attempt. 323 continue lifecycle 324 } 325 326 // Now that the shard was successfully sent, launch a go 327 // routine that will handle its result when its back. 328 shardHandler.collectResultAsync(attempt) 329 330 } 331 } 332 333 // shardHandler holds what is necessary to send and collect the result of 334 // shards. 335 type shardHandler struct { 336 identifier lntypes.Hash 337 router *ChannelRouter 338 shardTracker shards.ShardTracker 339 paySession PaymentSession 340 341 // shardErrors is a channel where errors collected by calling 342 // collectResultAsync will be delivered. These results are meant to be 343 // inspected by calling waitForShard or checkShards, and the channel 344 // doesn't need to be initiated if the caller is using the sync 345 // collectResult directly. 346 shardErrors chan error 347 348 // quit is closed to signal the sub goroutines of the payment lifecycle 349 // to stop. 350 quit chan struct{} 351 wg sync.WaitGroup 352 } 353 354 // stop signals any active shard goroutine to exit and waits for them to exit. 355 func (p *shardHandler) stop() { 356 close(p.quit) 357 p.wg.Wait() 358 } 359 360 // waitForShard blocks until any of the outstanding shards return. 361 func (p *shardHandler) waitForShard() error { 362 select { 363 case err := <-p.shardErrors: 364 return err 365 366 case <-p.quit: 367 return errShardHandlerExiting 368 369 case <-p.router.quit: 370 return ErrRouterShuttingDown 371 } 372 } 373 374 // checkShards is a non-blocking method that check if any shards has finished 375 // their execution. 376 func (p *shardHandler) checkShards() error { 377 for { 378 select { 379 case err := <-p.shardErrors: 380 if err != nil { 381 return err 382 } 383 384 case <-p.quit: 385 return errShardHandlerExiting 386 387 case <-p.router.quit: 388 return ErrRouterShuttingDown 389 390 default: 391 return nil 392 } 393 } 394 } 395 396 // launchOutcome is a type returned from launchShard that indicates whether the 397 // shard was successfully send onto the network. 398 type launchOutcome struct { 399 // err is non-nil if a non-critical error was encountered when trying 400 // to send the shard, and we successfully updated the control tower to 401 // reflect this error. This can be errors like not enough local 402 // balance for the given route etc. 403 err error 404 405 // attempt is the attempt structure as recorded in the database. 406 attempt *channeldb.HTLCAttempt 407 } 408 409 // launchShard creates and sends an HTLC attempt along the given route, 410 // registering it with the control tower before sending it. The lastShard 411 // argument should be true if this shard will consume the remainder of the 412 // amount to send. It returns the HTLCAttemptInfo that was created for the 413 // shard, along with a launchOutcome. The launchOutcome is used to indicate 414 // whether the attempt was successfully sent. If the launchOutcome wraps a 415 // non-nil error, it means that the attempt was not sent onto the network, so 416 // no result will be available in the future for it. 417 func (p *shardHandler) launchShard(rt *route.Route, 418 lastShard bool) (*channeldb.HTLCAttemptInfo, *launchOutcome, error) { 419 420 // Using the route received from the payment session, create a new 421 // shard to send. 422 firstHop, htlcAdd, attempt, err := p.createNewPaymentAttempt( 423 rt, lastShard, 424 ) 425 if err != nil { 426 return nil, nil, err 427 } 428 429 // Before sending this HTLC to the switch, we checkpoint the fresh 430 // paymentID and route to the DB. This lets us know on startup the ID 431 // of the payment that we attempted to send, such that we can query the 432 // Switch for its whereabouts. The route is needed to handle the result 433 // when it eventually comes back. 434 err = p.router.cfg.Control.RegisterAttempt(p.identifier, attempt) 435 if err != nil { 436 return nil, nil, err 437 } 438 439 // Now that the attempt is created and checkpointed to the DB, we send 440 // it. 441 sendErr := p.sendPaymentAttempt(attempt, firstHop, htlcAdd) 442 if sendErr != nil { 443 // TODO(joostjager): Distinguish unexpected internal errors 444 // from real send errors. 445 htlcAttempt, err := p.failAttempt(attempt, sendErr) 446 if err != nil { 447 return nil, nil, err 448 } 449 450 // Return a launchOutcome indicating the shard failed. 451 return attempt, &launchOutcome{ 452 attempt: htlcAttempt, 453 err: sendErr, 454 }, nil 455 } 456 457 return attempt, &launchOutcome{}, nil 458 } 459 460 // shardResult holds the resulting outcome of a shard sent. 461 type shardResult struct { 462 // attempt is the attempt structure as recorded in the database. 463 attempt *channeldb.HTLCAttempt 464 465 // err indicates that the shard failed. 466 err error 467 } 468 469 // collectResultAsync launches a goroutine that will wait for the result of the 470 // given HTLC attempt to be available then handle its result. It will fail the 471 // payment with the control tower if a terminal error is encountered. 472 func (p *shardHandler) collectResultAsync(attempt *channeldb.HTLCAttemptInfo) { 473 474 // errToSend is the error to be sent to sh.shardErrors. 475 var errToSend error 476 477 // handleResultErr is a function closure must be called using defer. It 478 // finishes collecting result by updating the payment state and send 479 // the error (or nil) to sh.shardErrors. 480 handleResultErr := func() { 481 // Send the error or quit. 482 select { 483 case p.shardErrors <- errToSend: 484 case <-p.router.quit: 485 case <-p.quit: 486 } 487 488 p.wg.Done() 489 } 490 491 p.wg.Add(1) 492 go func() { 493 defer handleResultErr() 494 495 // Block until the result is available. 496 result, err := p.collectResult(attempt) 497 if err != nil { 498 if err != ErrRouterShuttingDown && 499 err != htlcswitch.ErrSwitchExiting && 500 err != errShardHandlerExiting { 501 502 log.Errorf("Error collecting result for "+ 503 "shard %v for payment %v: %v", 504 attempt.AttemptID, p.identifier, err) 505 } 506 507 // Overwrite the param errToSend and return so that the 508 // defer function will use the param to proceed. 509 errToSend = err 510 return 511 } 512 513 // If a non-critical error was encountered handle it and mark 514 // the payment failed if the failure was terminal. 515 if result.err != nil { 516 // Overwrite the param errToSend and return so that the 517 // defer function will use the param to proceed. Notice 518 // that the errToSend could be nil here. 519 errToSend = p.handleSendError(attempt, result.err) 520 return 521 } 522 }() 523 } 524 525 // collectResult waits for the result for the given attempt to be available 526 // from the Switch, then records the attempt outcome with the control tower. A 527 // shardResult is returned, indicating the final outcome of this HTLC attempt. 528 func (p *shardHandler) collectResult(attempt *channeldb.HTLCAttemptInfo) ( 529 *shardResult, error) { 530 531 // We'll retrieve the hash specific to this shard from the 532 // shardTracker, since it will be needed to regenerate the circuit 533 // below. 534 hash, err := p.shardTracker.GetHash(attempt.AttemptID) 535 if err != nil { 536 return nil, err 537 } 538 539 // Regenerate the circuit for this attempt. 540 _, circuit, err := generateSphinxPacket( 541 &attempt.Route, hash[:], attempt.SessionKey(), 542 ) 543 if err != nil { 544 return nil, err 545 } 546 547 // Using the created circuit, initialize the error decrypter so we can 548 // parse+decode any failures incurred by this payment within the 549 // switch. 550 errorDecryptor := &htlcswitch.SphinxErrorDecrypter{ 551 OnionErrorDecrypter: sphinx.NewOnionErrorDecrypter(circuit), 552 } 553 554 // Now ask the switch to return the result of the payment when 555 // available. 556 resultChan, err := p.router.cfg.Payer.GetPaymentResult( 557 attempt.AttemptID, p.identifier, errorDecryptor, 558 ) 559 switch { 560 561 // If this attempt ID is unknown to the Switch, it means it was never 562 // checkpointed and forwarded by the switch before a restart. In this 563 // case we can safely send a new payment attempt, and wait for its 564 // result to be available. 565 case err == htlcswitch.ErrPaymentIDNotFound: 566 log.Debugf("Attempt ID %v for payment %v not found in "+ 567 "the Switch, retrying.", attempt.AttemptID, 568 p.identifier) 569 570 attempt, cErr := p.failAttempt(attempt, err) 571 if cErr != nil { 572 return nil, cErr 573 } 574 575 return &shardResult{ 576 attempt: attempt, 577 err: err, 578 }, nil 579 580 // A critical, unexpected error was encountered. 581 case err != nil: 582 log.Errorf("Failed getting result for attemptID %d "+ 583 "from switch: %v", attempt.AttemptID, err) 584 585 return nil, err 586 } 587 588 // The switch knows about this payment, we'll wait for a result to be 589 // available. 590 var ( 591 result *htlcswitch.PaymentResult 592 ok bool 593 ) 594 595 select { 596 case result, ok = <-resultChan: 597 if !ok { 598 return nil, htlcswitch.ErrSwitchExiting 599 } 600 601 case <-p.router.quit: 602 return nil, ErrRouterShuttingDown 603 } 604 605 // In case of a payment failure, fail the attempt with the control 606 // tower and return. 607 if result.Error != nil { 608 attempt, err := p.failAttempt(attempt, result.Error) 609 if err != nil { 610 return nil, err 611 } 612 613 return &shardResult{ 614 attempt: attempt, 615 err: result.Error, 616 }, nil 617 } 618 619 // We successfully got a payment result back from the switch. 620 log.Debugf("Payment %v succeeded with pid=%v", 621 p.identifier, attempt.AttemptID) 622 623 // Report success to mission control. 624 err = p.router.cfg.MissionControl.ReportPaymentSuccess( 625 attempt.AttemptID, &attempt.Route, 626 ) 627 if err != nil { 628 log.Errorf("Error reporting payment success to mc: %v", 629 err) 630 } 631 632 // In case of success we atomically store settle result to the DB move 633 // the shard to the settled state. 634 htlcAttempt, err := p.router.cfg.Control.SettleAttempt( 635 p.identifier, attempt.AttemptID, 636 &channeldb.HTLCSettleInfo{ 637 Preimage: result.Preimage, 638 SettleTime: p.router.cfg.Clock.Now(), 639 }, 640 ) 641 if err != nil { 642 log.Errorf("Unable to succeed payment attempt: %v", err) 643 return nil, err 644 } 645 646 return &shardResult{ 647 attempt: htlcAttempt, 648 }, nil 649 } 650 651 // createNewPaymentAttempt creates a new payment attempt from the given route. 652 func (p *shardHandler) createNewPaymentAttempt(rt *route.Route, lastShard bool) ( 653 lnwire.ShortChannelID, *lnwire.UpdateAddHTLC, 654 *channeldb.HTLCAttemptInfo, error) { 655 656 // Generate a new key to be used for this attempt. 657 sessionKey, err := generateNewSessionKey() 658 if err != nil { 659 return lnwire.ShortChannelID{}, nil, nil, err 660 } 661 662 // We generate a new, unique payment ID that we will use for 663 // this HTLC. 664 attemptID, err := p.router.cfg.NextPaymentID() 665 if err != nil { 666 return lnwire.ShortChannelID{}, nil, nil, err 667 } 668 669 // Requesst a new shard from the ShardTracker. If this is an AMP 670 // payment, and this is the last shard, the outstanding shards together 671 // with ths one will be enough for the receiver to derive all HTLC 672 // preimages. If this a non-AMP payment, the ShardTracker will return a 673 // simple shard with the payment's static payment hash. 674 shard, err := p.shardTracker.NewShard(attemptID, lastShard) 675 if err != nil { 676 return lnwire.ShortChannelID{}, nil, nil, err 677 } 678 679 // It this shard carries MPP or AMP options, add them to the last hop 680 // on the route. 681 hop := rt.Hops[len(rt.Hops)-1] 682 if shard.MPP() != nil { 683 hop.MPP = shard.MPP() 684 } 685 686 if shard.AMP() != nil { 687 hop.AMP = shard.AMP() 688 } 689 690 // Generate the raw encoded sphinx packet to be included along 691 // with the htlcAdd message that we send directly to the 692 // switch. 693 hash := shard.Hash() 694 onionBlob, _, err := generateSphinxPacket(rt, hash[:], sessionKey) 695 if err != nil { 696 return lnwire.ShortChannelID{}, nil, nil, err 697 } 698 699 // Craft an HTLC packet to send to the layer 2 switch. The 700 // metadata within this packet will be used to route the 701 // payment through the network, starting with the first-hop. 702 htlcAdd := &lnwire.UpdateAddHTLC{ 703 Amount: rt.TotalAmount, 704 Expiry: rt.TotalTimeLock, 705 PaymentHash: hash, 706 } 707 copy(htlcAdd.OnionBlob[:], onionBlob) 708 709 // Attempt to send this payment through the network to complete 710 // the payment. If this attempt fails, then we'll continue on 711 // to the next available route. 712 firstHop := lnwire.NewShortChanIDFromInt( 713 rt.Hops[0].ChannelID, 714 ) 715 716 // We now have all the information needed to populate the current 717 // attempt information. 718 attempt := channeldb.NewHtlcAttemptInfo( 719 attemptID, sessionKey, *rt, p.router.cfg.Clock.Now(), &hash, 720 ) 721 722 return firstHop, htlcAdd, attempt, nil 723 } 724 725 // sendPaymentAttempt attempts to send the current attempt to the switch. 726 func (p *shardHandler) sendPaymentAttempt( 727 attempt *channeldb.HTLCAttemptInfo, firstHop lnwire.ShortChannelID, 728 htlcAdd *lnwire.UpdateAddHTLC) error { 729 730 log.Tracef("Attempting to send payment %v (pid=%v), "+ 731 "using route: %v", p.identifier, attempt.AttemptID, 732 newLogClosure(func() string { 733 return spew.Sdump(attempt.Route) 734 }), 735 ) 736 737 // Send it to the Switch. When this method returns we assume 738 // the Switch successfully has persisted the payment attempt, 739 // such that we can resume waiting for the result after a 740 // restart. 741 err := p.router.cfg.Payer.SendHTLC( 742 firstHop, attempt.AttemptID, htlcAdd, 743 ) 744 if err != nil { 745 log.Errorf("Failed sending attempt %d for payment "+ 746 "%v to switch: %v", attempt.AttemptID, 747 p.identifier, err) 748 return err 749 } 750 751 log.Debugf("Payment %v (pid=%v) successfully sent to switch, route: %v", 752 p.identifier, attempt.AttemptID, &attempt.Route) 753 754 return nil 755 } 756 757 // handleSendError inspects the given error from the Switch and determines 758 // whether we should make another payment attempt, or if it should be 759 // considered a terminal error. Terminal errors will be recorded with the 760 // control tower. It analyzes the sendErr for the payment attempt received from 761 // the switch and updates mission control and/or channel policies. Depending on 762 // the error type, the error is either the final outcome of the payment or we 763 // need to continue with an alternative route. A final outcome is indicated by 764 // a non-nil reason value. 765 func (p *shardHandler) handleSendError(attempt *channeldb.HTLCAttemptInfo, 766 sendErr error) error { 767 768 internalErrorReason := channeldb.FailureReasonError 769 770 // failPayment is a helper closure that fails the payment via the 771 // router's control tower, which marks the payment as failed in db. 772 failPayment := func(reason *channeldb.FailureReason, 773 sendErr error) error { 774 775 log.Infof("Payment %v failed: final_outcome=%v, raw_err=%v", 776 p.identifier, *reason, sendErr) 777 778 // Fail the payment via control tower. 779 if err := p.router.cfg.Control.Fail( 780 p.identifier, *reason); err != nil { 781 782 log.Errorf("unable to report failure to control "+ 783 "tower: %v", err) 784 785 return &internalErrorReason 786 } 787 788 return reason 789 } 790 791 // reportFail is a helper closure that reports the failure to the 792 // mission control, which helps us to decide whether we want to retry 793 // the payment or not. If a non nil reason is returned from mission 794 // control, it will further fail the payment via control tower. 795 reportFail := func(srcIdx *int, msg lnwire.FailureMessage) error { 796 // Report outcome to mission control. 797 reason, err := p.router.cfg.MissionControl.ReportPaymentFail( 798 attempt.AttemptID, &attempt.Route, srcIdx, msg, 799 ) 800 if err != nil { 801 log.Errorf("Error reporting payment result to mc: %v", 802 err) 803 804 reason = &internalErrorReason 805 } 806 807 // Exit early if there's no reason. 808 if reason == nil { 809 return nil 810 } 811 812 return failPayment(reason, sendErr) 813 } 814 815 if sendErr == htlcswitch.ErrUnreadableFailureMessage { 816 log.Tracef("Unreadable failure when sending htlc") 817 818 return reportFail(nil, nil) 819 } 820 821 // If the error is a ClearTextError, we have received a valid wire 822 // failure message, either from our own outgoing link or from a node 823 // down the route. If the error is not related to the propagation of 824 // our payment, we can stop trying because an internal error has 825 // occurred. 826 rtErr, ok := sendErr.(htlcswitch.ClearTextError) 827 if !ok { 828 return failPayment(&internalErrorReason, sendErr) 829 } 830 831 // failureSourceIdx is the index of the node that the failure occurred 832 // at. If the ClearTextError received is not a ForwardingError the 833 // payment error occurred at our node, so we leave this value as 0 834 // to indicate that the failure occurred locally. If the error is a 835 // ForwardingError, it did not originate at our node, so we set 836 // failureSourceIdx to the index of the node where the failure occurred. 837 failureSourceIdx := 0 838 source, ok := rtErr.(*htlcswitch.ForwardingError) 839 if ok { 840 failureSourceIdx = source.FailureSourceIdx 841 } 842 843 // Extract the wire failure and apply channel update if it contains one. 844 // If we received an unknown failure message from a node along the 845 // route, the failure message will be nil. 846 failureMessage := rtErr.WireMessage() 847 err := p.handleFailureMessage( 848 &attempt.Route, failureSourceIdx, failureMessage, 849 ) 850 if err != nil { 851 return failPayment(&internalErrorReason, sendErr) 852 } 853 854 log.Tracef("Node=%v reported failure when sending htlc", 855 failureSourceIdx) 856 857 return reportFail(&failureSourceIdx, failureMessage) 858 } 859 860 // handleFailureMessage tries to apply a channel update present in the failure 861 // message if any. 862 func (p *shardHandler) handleFailureMessage(rt *route.Route, 863 errorSourceIdx int, failure lnwire.FailureMessage) error { 864 865 if failure == nil { 866 return nil 867 } 868 869 // It makes no sense to apply our own channel updates. 870 if errorSourceIdx == 0 { 871 log.Errorf("Channel update of ourselves received") 872 873 return nil 874 } 875 876 // Extract channel update if the error contains one. 877 update := p.router.extractChannelUpdate(failure) 878 if update == nil { 879 return nil 880 } 881 882 // Parse pubkey to allow validation of the channel update. This should 883 // always succeed, otherwise there is something wrong in our 884 // implementation. Therefore return an error. 885 errVertex := rt.Hops[errorSourceIdx-1].PubKeyBytes 886 errSource, err := secp256k1.ParsePubKey( 887 errVertex[:], 888 ) 889 if err != nil { 890 log.Errorf("Cannot parse pubkey: idx=%v, pubkey=%v", 891 errorSourceIdx, errVertex) 892 893 return err 894 } 895 896 var ( 897 isAdditionalEdge bool 898 policy *channeldb.CachedEdgePolicy 899 ) 900 901 // Before we apply the channel update, we need to decide whether the 902 // update is for additional (ephemeral) edge or normal edge stored in 903 // db. 904 // 905 // Note: the p.paySession might be nil here if it's called inside 906 // SendToRoute where there's no payment lifecycle. 907 if p.paySession != nil { 908 policy = p.paySession.GetAdditionalEdgePolicy( 909 errSource, update.ShortChannelID.ToUint64(), 910 ) 911 if policy != nil { 912 isAdditionalEdge = true 913 } 914 } 915 916 // Apply channel update to additional edge policy. 917 if isAdditionalEdge { 918 if !p.paySession.UpdateAdditionalEdge( 919 update, errSource, policy) { 920 921 log.Debugf("Invalid channel update received: node=%v", 922 errVertex) 923 } 924 return nil 925 } 926 927 // Apply channel update to the channel edge policy in our db. 928 if !p.router.applyChannelUpdate(update, errSource) { 929 log.Debugf("Invalid channel update received: node=%v", 930 errVertex) 931 } 932 return nil 933 } 934 935 // failAttempt calls control tower to fail the current payment attempt. 936 func (p *shardHandler) failAttempt(attempt *channeldb.HTLCAttemptInfo, 937 sendError error) (*channeldb.HTLCAttempt, error) { 938 939 log.Warnf("Attempt %v for payment %v failed: %v", attempt.AttemptID, 940 p.identifier, sendError) 941 942 failInfo := marshallError( 943 sendError, 944 p.router.cfg.Clock.Now(), 945 ) 946 947 // Now that we are failing this payment attempt, cancel the shard with 948 // the ShardTracker such that it can derive the correct hash for the 949 // next attempt. 950 if err := p.shardTracker.CancelShard(attempt.AttemptID); err != nil { 951 return nil, err 952 } 953 954 return p.router.cfg.Control.FailAttempt( 955 p.identifier, attempt.AttemptID, 956 failInfo, 957 ) 958 } 959 960 // marshallError marshall an error as received from the switch to a structure 961 // that is suitable for database storage. 962 func marshallError(sendError error, time time.Time) *channeldb.HTLCFailInfo { 963 response := &channeldb.HTLCFailInfo{ 964 FailTime: time, 965 } 966 967 switch sendError { 968 969 case htlcswitch.ErrPaymentIDNotFound: 970 response.Reason = channeldb.HTLCFailInternal 971 return response 972 973 case htlcswitch.ErrUnreadableFailureMessage: 974 response.Reason = channeldb.HTLCFailUnreadable 975 return response 976 } 977 978 rtErr, ok := sendError.(htlcswitch.ClearTextError) 979 if !ok { 980 response.Reason = channeldb.HTLCFailInternal 981 return response 982 } 983 984 message := rtErr.WireMessage() 985 if message != nil { 986 response.Reason = channeldb.HTLCFailMessage 987 response.Message = message 988 } else { 989 response.Reason = channeldb.HTLCFailUnknown 990 } 991 992 // If the ClearTextError received is a ForwardingError, the error 993 // originated from a node along the route, not locally on our outgoing 994 // link. We set failureSourceIdx to the index of the node where the 995 // failure occurred. If the error is not a ForwardingError, the failure 996 // occurred at our node, so we leave the index as 0 to indicate that 997 // we failed locally. 998 fErr, ok := rtErr.(*htlcswitch.ForwardingError) 999 if ok { 1000 response.FailureSourceIndex = uint32(fErr.FailureSourceIdx) 1001 } 1002 1003 return response 1004 }