github.com/decred/dcrlnd@v0.7.6/routing/payment_lifecycle.go

github.com/decred/dcrlnd@v0.7.6/routing/payment_lifecycle.go (about)

     1  package routing
     2  
     3  import (
     4  	"fmt"
     5  	"sync"
     6  	"time"
     7  
     8  	"github.com/davecgh/go-spew/spew"
     9  	"github.com/decred/dcrd/dcrec/secp256k1/v4"
    10  	"github.com/decred/dcrlnd/channeldb"
    11  	"github.com/decred/dcrlnd/htlcswitch"
    12  	"github.com/decred/dcrlnd/lntypes"
    13  	"github.com/decred/dcrlnd/lnwire"
    14  	"github.com/decred/dcrlnd/routing/route"
    15  	"github.com/decred/dcrlnd/routing/shards"
    16  	sphinx "github.com/decred/lightning-onion/v4"
    17  )
    18  
    19  // errShardHandlerExiting is returned from the shardHandler when it exits.
    20  var errShardHandlerExiting = fmt.Errorf("shard handler exiting")
    21  
    22  // paymentLifecycle holds all information about the current state of a payment
    23  // needed to resume if from any point.
    24  type paymentLifecycle struct {
    25  	router        *ChannelRouter
    26  	totalAmount   lnwire.MilliAtom
    27  	feeLimit      lnwire.MilliAtom
    28  	identifier    lntypes.Hash
    29  	paySession    PaymentSession
    30  	shardTracker  shards.ShardTracker
    31  	timeoutChan   <-chan time.Time
    32  	currentHeight int32
    33  }
    34  
    35  // payemntState holds a number of key insights learned from a given MPPayment
    36  // that we use to determine what to do on each payment loop iteration.
    37  type paymentState struct {
    38  	numShardsInFlight int
    39  	remainingAmt      lnwire.MilliAtom
    40  	remainingFees     lnwire.MilliAtom
    41  
    42  	// terminate indicates the payment is in its final stage and no more
    43  	// shards should be launched. This value is true if we have an HTLC
    44  	// settled or the payment has an error.
    45  	terminate bool
    46  }
    47  
    48  // terminated returns a bool to indicate there are no further actions needed
    49  // and we should return what we have, either the payment preimage or the
    50  // payment error.
    51  func (ps paymentState) terminated() bool {
    52  	// If the payment is in final stage and we have no in flight shards to
    53  	// wait result for, we consider the whole action terminated.
    54  	return ps.terminate && ps.numShardsInFlight == 0
    55  }
    56  
    57  // needWaitForShards returns a bool to specify whether we need to wait for the
    58  // outcome of the shanrdHandler.
    59  func (ps paymentState) needWaitForShards() bool {
    60  	// If we have in flight shards and the payment is in final stage, we
    61  	// need to wait for the outcomes from the shards. Or if we have no more
    62  	// money to be sent, we need to wait for the already launched shards.
    63  	if ps.numShardsInFlight == 0 {
    64  		return false
    65  	}
    66  	return ps.terminate || ps.remainingAmt == 0
    67  }
    68  
    69  // fetchPaymentState will query the db for the latest payment state
    70  // information we need to act on every iteration of the payment loop and update
    71  // the paymentState.
    72  func (p *paymentLifecycle) fetchPaymentState() (*channeldb.MPPayment,
    73  	*paymentState, error) {
    74  
    75  	// Fetch the latest payment from db.
    76  	payment, err := p.router.cfg.Control.FetchPayment(p.identifier)
    77  	if err != nil {
    78  		return nil, nil, err
    79  	}
    80  
    81  	// Fetch the total amount and fees that has already been sent in
    82  	// settled and still in-flight shards.
    83  	sentAmt, fees := payment.SentAmt()
    84  
    85  	// Sanity check we haven't sent a value larger than the payment amount.
    86  	if sentAmt > p.totalAmount {
    87  		return nil, nil, fmt.Errorf("amount sent %v exceeds "+
    88  			"total amount %v", sentAmt, p.totalAmount)
    89  	}
    90  
    91  	// We'll subtract the used fee from our fee budget, but allow the fees
    92  	// of the already sent shards to exceed our budget (can happen after
    93  	// restarts).
    94  	feeBudget := p.feeLimit
    95  	if fees <= feeBudget {
    96  		feeBudget -= fees
    97  	} else {
    98  		feeBudget = 0
    99  	}
   100  
   101  	// Get any terminal info for this payment.
   102  	settle, failure := payment.TerminalInfo()
   103  
   104  	// If either an HTLC settled, or the payment has a payment level
   105  	// failure recorded, it means we should terminate the moment all shards
   106  	// have returned with a result.
   107  	terminate := settle != nil || failure != nil
   108  
   109  	// Update the payment state.
   110  	state := &paymentState{
   111  		numShardsInFlight: len(payment.InFlightHTLCs()),
   112  		remainingAmt:      p.totalAmount - sentAmt,
   113  		remainingFees:     feeBudget,
   114  		terminate:         terminate,
   115  	}
   116  
   117  	return payment, state, nil
   118  }
   119  
   120  // resumePayment resumes the paymentLifecycle from the current state.
   121  func (p *paymentLifecycle) resumePayment() ([32]byte, *route.Route, error) {
   122  	shardHandler := &shardHandler{
   123  		router:       p.router,
   124  		identifier:   p.identifier,
   125  		shardTracker: p.shardTracker,
   126  		shardErrors:  make(chan error),
   127  		quit:         make(chan struct{}),
   128  		paySession:   p.paySession,
   129  	}
   130  
   131  	// When the payment lifecycle loop exits, we make sure to signal any
   132  	// sub goroutine of the shardHandler to exit, then wait for them to
   133  	// return.
   134  	defer shardHandler.stop()
   135  
   136  	// If we had any existing attempts outstanding, we'll start by spinning
   137  	// up goroutines that'll collect their results and deliver them to the
   138  	// lifecycle loop below.
   139  	payment, _, err := p.fetchPaymentState()
   140  	if err != nil {
   141  		return [32]byte{}, nil, err
   142  	}
   143  
   144  	for _, a := range payment.InFlightHTLCs() {
   145  		a := a
   146  
   147  		log.Infof("Resuming payment shard %v for payment %v",
   148  			a.AttemptID, p.identifier)
   149  
   150  		shardHandler.collectResultAsync(&a.HTLCAttemptInfo)
   151  	}
   152  
   153  	// We'll continue until either our payment succeeds, or we encounter a
   154  	// critical error during path finding.
   155  lifecycle:
   156  	for {
   157  		// Start by quickly checking if there are any outcomes already
   158  		// available to handle before we reevaluate our state.
   159  		if err := shardHandler.checkShards(); err != nil {
   160  			return [32]byte{}, nil, err
   161  		}
   162  
   163  		// We update the payment state on every iteration. Since the
   164  		// payment state is affected by multiple goroutines (ie,
   165  		// collectResultAsync), it is NOT guaranteed that we always
   166  		// have the latest state here. This is fine as long as the
   167  		// state is consistent as a whole.
   168  		payment, currentState, err := p.fetchPaymentState()
   169  		if err != nil {
   170  			return [32]byte{}, nil, err
   171  		}
   172  
   173  		log.Debugf("Payment %v in state terminate=%v, "+
   174  			"active_shards=%v, rem_value=%v, fee_limit=%v",
   175  			p.identifier, currentState.terminate,
   176  			currentState.numShardsInFlight,
   177  			currentState.remainingAmt, currentState.remainingFees,
   178  		)
   179  
   180  		// TODO(yy): sanity check all the states to make sure
   181  		// everything is expected.
   182  		switch {
   183  
   184  		// We have a terminal condition and no active shards, we are
   185  		// ready to exit.
   186  		case currentState.terminated():
   187  			// Find the first successful shard and return
   188  			// the preimage and route.
   189  			for _, a := range payment.HTLCs {
   190  				if a.Settle != nil {
   191  					return a.Settle.Preimage, &a.Route, nil
   192  				}
   193  			}
   194  
   195  			// Payment failed.
   196  			return [32]byte{}, nil, *payment.FailureReason
   197  
   198  		// If we either reached a terminal error condition (but had
   199  		// active shards still) or there is no remaining value to send,
   200  		// we'll wait for a shard outcome.
   201  		case currentState.needWaitForShards():
   202  			// We still have outstanding shards, so wait for a new
   203  			// outcome to be available before re-evaluating our
   204  			// state.
   205  			if err := shardHandler.waitForShard(); err != nil {
   206  				return [32]byte{}, nil, err
   207  			}
   208  			continue lifecycle
   209  		}
   210  
   211  		// Before we attempt any new shard, we'll check to see if
   212  		// either we've gone past the payment attempt timeout, or the
   213  		// router is exiting. In either case, we'll stop this payment
   214  		// attempt short. If a timeout is not applicable, timeoutChan
   215  		// will be nil.
   216  		select {
   217  		case <-p.timeoutChan:
   218  			log.Warnf("payment attempt not completed before " +
   219  				"timeout")
   220  
   221  			// By marking the payment failed with the control
   222  			// tower, no further shards will be launched and we'll
   223  			// return with an error the moment all active shards
   224  			// have finished.
   225  			saveErr := p.router.cfg.Control.Fail(
   226  				p.identifier, channeldb.FailureReasonTimeout,
   227  			)
   228  			if saveErr != nil {
   229  				return [32]byte{}, nil, saveErr
   230  			}
   231  
   232  			continue lifecycle
   233  
   234  		case <-p.router.quit:
   235  			return [32]byte{}, nil, ErrRouterShuttingDown
   236  
   237  		// Fall through if we haven't hit our time limit.
   238  		default:
   239  		}
   240  
   241  		// Create a new payment attempt from the given payment session.
   242  		rt, err := p.paySession.RequestRoute(
   243  			currentState.remainingAmt, currentState.remainingFees,
   244  			uint32(currentState.numShardsInFlight),
   245  			uint32(p.currentHeight),
   246  		)
   247  		if err != nil {
   248  			log.Warnf("Failed to find route for payment %v: %v",
   249  				p.identifier, err)
   250  
   251  			routeErr, ok := err.(noRouteError)
   252  			if !ok {
   253  				return [32]byte{}, nil, err
   254  			}
   255  
   256  			// There is no route to try, and we have no active
   257  			// shards. This means that there is no way for us to
   258  			// send the payment, so mark it failed with no route.
   259  			if currentState.numShardsInFlight == 0 {
   260  				failureCode := routeErr.FailureReason()
   261  				log.Debugf("Marking payment %v permanently "+
   262  					"failed with no route: %v",
   263  					p.identifier, failureCode)
   264  
   265  				saveErr := p.router.cfg.Control.Fail(
   266  					p.identifier, failureCode,
   267  				)
   268  				if saveErr != nil {
   269  					return [32]byte{}, nil, saveErr
   270  				}
   271  
   272  				continue lifecycle
   273  			}
   274  
   275  			// We still have active shards, we'll wait for an
   276  			// outcome to be available before retrying.
   277  			if err := shardHandler.waitForShard(); err != nil {
   278  				return [32]byte{}, nil, err
   279  			}
   280  			continue lifecycle
   281  		}
   282  
   283  		// If this route will consume the last remeining amount to send
   284  		// to the receiver, this will be our last shard (for now).
   285  		lastShard := rt.ReceiverAmt() == currentState.remainingAmt
   286  
   287  		// We found a route to try, launch a new shard.
   288  		attempt, outcome, err := shardHandler.launchShard(rt, lastShard)
   289  		switch {
   290  		// We may get a terminal error if we've processed a shard with
   291  		// a terminal state (settled or permanent failure), while we
   292  		// were pathfinding. We know we're in a terminal state here,
   293  		// so we can continue and wait for our last shards to return.
   294  		case err == channeldb.ErrPaymentTerminal:
   295  			log.Infof("Payment %v in terminal state, abandoning "+
   296  				"shard", p.identifier)
   297  
   298  			continue lifecycle
   299  
   300  		case err != nil:
   301  			return [32]byte{}, nil, err
   302  		}
   303  
   304  		// If we encountered a non-critical error when launching the
   305  		// shard, handle it.
   306  		if outcome.err != nil {
   307  			log.Warnf("Failed to launch shard %v for "+
   308  				"payment %v: %v", attempt.AttemptID,
   309  				p.identifier, outcome.err)
   310  
   311  			// We must inspect the error to know whether it was
   312  			// critical or not, to decide whether we should
   313  			// continue trying.
   314  			err := shardHandler.handleSendError(
   315  				attempt, outcome.err,
   316  			)
   317  			if err != nil {
   318  				return [32]byte{}, nil, err
   319  			}
   320  
   321  			// Error was handled successfully, continue to make a
   322  			// new attempt.
   323  			continue lifecycle
   324  		}
   325  
   326  		// Now that the shard was successfully sent, launch a go
   327  		// routine that will handle its result when its back.
   328  		shardHandler.collectResultAsync(attempt)
   329  
   330  	}
   331  }
   332  
   333  // shardHandler holds what is necessary to send and collect the result of
   334  // shards.
   335  type shardHandler struct {
   336  	identifier   lntypes.Hash
   337  	router       *ChannelRouter
   338  	shardTracker shards.ShardTracker
   339  	paySession   PaymentSession
   340  
   341  	// shardErrors is a channel where errors collected by calling
   342  	// collectResultAsync will be delivered. These results are meant to be
   343  	// inspected by calling waitForShard or checkShards, and the channel
   344  	// doesn't need to be initiated if the caller is using the sync
   345  	// collectResult directly.
   346  	shardErrors chan error
   347  
   348  	// quit is closed to signal the sub goroutines of the payment lifecycle
   349  	// to stop.
   350  	quit chan struct{}
   351  	wg   sync.WaitGroup
   352  }
   353  
   354  // stop signals any active shard goroutine to exit and waits for them to exit.
   355  func (p *shardHandler) stop() {
   356  	close(p.quit)
   357  	p.wg.Wait()
   358  }
   359  
   360  // waitForShard blocks until any of the outstanding shards return.
   361  func (p *shardHandler) waitForShard() error {
   362  	select {
   363  	case err := <-p.shardErrors:
   364  		return err
   365  
   366  	case <-p.quit:
   367  		return errShardHandlerExiting
   368  
   369  	case <-p.router.quit:
   370  		return ErrRouterShuttingDown
   371  	}
   372  }
   373  
   374  // checkShards is a non-blocking method that check if any shards has finished
   375  // their execution.
   376  func (p *shardHandler) checkShards() error {
   377  	for {
   378  		select {
   379  		case err := <-p.shardErrors:
   380  			if err != nil {
   381  				return err
   382  			}
   383  
   384  		case <-p.quit:
   385  			return errShardHandlerExiting
   386  
   387  		case <-p.router.quit:
   388  			return ErrRouterShuttingDown
   389  
   390  		default:
   391  			return nil
   392  		}
   393  	}
   394  }
   395  
   396  // launchOutcome is a type returned from launchShard that indicates whether the
   397  // shard was successfully send onto the network.
   398  type launchOutcome struct {
   399  	// err is non-nil if a non-critical error was encountered when trying
   400  	// to send the shard, and we successfully updated the control tower to
   401  	// reflect this error. This can be errors like not enough local
   402  	// balance for the given route etc.
   403  	err error
   404  
   405  	// attempt is the attempt structure as recorded in the database.
   406  	attempt *channeldb.HTLCAttempt
   407  }
   408  
   409  // launchShard creates and sends an HTLC attempt along the given route,
   410  // registering it with the control tower before sending it. The lastShard
   411  // argument should be true if this shard will consume the remainder of the
   412  // amount to send. It returns the HTLCAttemptInfo that was created for the
   413  // shard, along with a launchOutcome.  The launchOutcome is used to indicate
   414  // whether the attempt was successfully sent. If the launchOutcome wraps a
   415  // non-nil error, it means that the attempt was not sent onto the network, so
   416  // no result will be available in the future for it.
   417  func (p *shardHandler) launchShard(rt *route.Route,
   418  	lastShard bool) (*channeldb.HTLCAttemptInfo, *launchOutcome, error) {
   419  
   420  	// Using the route received from the payment session, create a new
   421  	// shard to send.
   422  	firstHop, htlcAdd, attempt, err := p.createNewPaymentAttempt(
   423  		rt, lastShard,
   424  	)
   425  	if err != nil {
   426  		return nil, nil, err
   427  	}
   428  
   429  	// Before sending this HTLC to the switch, we checkpoint the fresh
   430  	// paymentID and route to the DB. This lets us know on startup the ID
   431  	// of the payment that we attempted to send, such that we can query the
   432  	// Switch for its whereabouts. The route is needed to handle the result
   433  	// when it eventually comes back.
   434  	err = p.router.cfg.Control.RegisterAttempt(p.identifier, attempt)
   435  	if err != nil {
   436  		return nil, nil, err
   437  	}
   438  
   439  	// Now that the attempt is created and checkpointed to the DB, we send
   440  	// it.
   441  	sendErr := p.sendPaymentAttempt(attempt, firstHop, htlcAdd)
   442  	if sendErr != nil {
   443  		// TODO(joostjager): Distinguish unexpected internal errors
   444  		// from real send errors.
   445  		htlcAttempt, err := p.failAttempt(attempt, sendErr)
   446  		if err != nil {
   447  			return nil, nil, err
   448  		}
   449  
   450  		// Return a launchOutcome indicating the shard failed.
   451  		return attempt, &launchOutcome{
   452  			attempt: htlcAttempt,
   453  			err:     sendErr,
   454  		}, nil
   455  	}
   456  
   457  	return attempt, &launchOutcome{}, nil
   458  }
   459  
   460  // shardResult holds the resulting outcome of a shard sent.
   461  type shardResult struct {
   462  	// attempt is the attempt structure as recorded in the database.
   463  	attempt *channeldb.HTLCAttempt
   464  
   465  	// err indicates that the shard failed.
   466  	err error
   467  }
   468  
   469  // collectResultAsync launches a goroutine that will wait for the result of the
   470  // given HTLC attempt to be available then handle its result. It will fail the
   471  // payment with the control tower if a terminal error is encountered.
   472  func (p *shardHandler) collectResultAsync(attempt *channeldb.HTLCAttemptInfo) {
   473  
   474  	// errToSend is the error to be sent to sh.shardErrors.
   475  	var errToSend error
   476  
   477  	// handleResultErr is a function closure must be called using defer. It
   478  	// finishes collecting result by updating the payment state and send
   479  	// the error (or nil) to sh.shardErrors.
   480  	handleResultErr := func() {
   481  		// Send the error or quit.
   482  		select {
   483  		case p.shardErrors <- errToSend:
   484  		case <-p.router.quit:
   485  		case <-p.quit:
   486  		}
   487  
   488  		p.wg.Done()
   489  	}
   490  
   491  	p.wg.Add(1)
   492  	go func() {
   493  		defer handleResultErr()
   494  
   495  		// Block until the result is available.
   496  		result, err := p.collectResult(attempt)
   497  		if err != nil {
   498  			if err != ErrRouterShuttingDown &&
   499  				err != htlcswitch.ErrSwitchExiting &&
   500  				err != errShardHandlerExiting {
   501  
   502  				log.Errorf("Error collecting result for "+
   503  					"shard %v for payment %v: %v",
   504  					attempt.AttemptID, p.identifier, err)
   505  			}
   506  
   507  			// Overwrite the param errToSend and return so that the
   508  			// defer function will use the param to proceed.
   509  			errToSend = err
   510  			return
   511  		}
   512  
   513  		// If a non-critical error was encountered handle it and mark
   514  		// the payment failed if the failure was terminal.
   515  		if result.err != nil {
   516  			// Overwrite the param errToSend and return so that the
   517  			// defer function will use the param to proceed. Notice
   518  			// that the errToSend could be nil here.
   519  			errToSend = p.handleSendError(attempt, result.err)
   520  			return
   521  		}
   522  	}()
   523  }
   524  
   525  // collectResult waits for the result for the given attempt to be available
   526  // from the Switch, then records the attempt outcome with the control tower. A
   527  // shardResult is returned, indicating the final outcome of this HTLC attempt.
   528  func (p *shardHandler) collectResult(attempt *channeldb.HTLCAttemptInfo) (
   529  	*shardResult, error) {
   530  
   531  	// We'll retrieve the hash specific to this shard from the
   532  	// shardTracker, since it will be needed to regenerate the circuit
   533  	// below.
   534  	hash, err := p.shardTracker.GetHash(attempt.AttemptID)
   535  	if err != nil {
   536  		return nil, err
   537  	}
   538  
   539  	// Regenerate the circuit for this attempt.
   540  	_, circuit, err := generateSphinxPacket(
   541  		&attempt.Route, hash[:], attempt.SessionKey(),
   542  	)
   543  	if err != nil {
   544  		return nil, err
   545  	}
   546  
   547  	// Using the created circuit, initialize the error decrypter so we can
   548  	// parse+decode any failures incurred by this payment within the
   549  	// switch.
   550  	errorDecryptor := &htlcswitch.SphinxErrorDecrypter{
   551  		OnionErrorDecrypter: sphinx.NewOnionErrorDecrypter(circuit),
   552  	}
   553  
   554  	// Now ask the switch to return the result of the payment when
   555  	// available.
   556  	resultChan, err := p.router.cfg.Payer.GetPaymentResult(
   557  		attempt.AttemptID, p.identifier, errorDecryptor,
   558  	)
   559  	switch {
   560  
   561  	// If this attempt ID is unknown to the Switch, it means it was never
   562  	// checkpointed and forwarded by the switch before a restart. In this
   563  	// case we can safely send a new payment attempt, and wait for its
   564  	// result to be available.
   565  	case err == htlcswitch.ErrPaymentIDNotFound:
   566  		log.Debugf("Attempt ID %v for payment %v not found in "+
   567  			"the Switch, retrying.", attempt.AttemptID,
   568  			p.identifier)
   569  
   570  		attempt, cErr := p.failAttempt(attempt, err)
   571  		if cErr != nil {
   572  			return nil, cErr
   573  		}
   574  
   575  		return &shardResult{
   576  			attempt: attempt,
   577  			err:     err,
   578  		}, nil
   579  
   580  	// A critical, unexpected error was encountered.
   581  	case err != nil:
   582  		log.Errorf("Failed getting result for attemptID %d "+
   583  			"from switch: %v", attempt.AttemptID, err)
   584  
   585  		return nil, err
   586  	}
   587  
   588  	// The switch knows about this payment, we'll wait for a result to be
   589  	// available.
   590  	var (
   591  		result *htlcswitch.PaymentResult
   592  		ok     bool
   593  	)
   594  
   595  	select {
   596  	case result, ok = <-resultChan:
   597  		if !ok {
   598  			return nil, htlcswitch.ErrSwitchExiting
   599  		}
   600  
   601  	case <-p.router.quit:
   602  		return nil, ErrRouterShuttingDown
   603  	}
   604  
   605  	// In case of a payment failure, fail the attempt with the control
   606  	// tower and return.
   607  	if result.Error != nil {
   608  		attempt, err := p.failAttempt(attempt, result.Error)
   609  		if err != nil {
   610  			return nil, err
   611  		}
   612  
   613  		return &shardResult{
   614  			attempt: attempt,
   615  			err:     result.Error,
   616  		}, nil
   617  	}
   618  
   619  	// We successfully got a payment result back from the switch.
   620  	log.Debugf("Payment %v succeeded with pid=%v",
   621  		p.identifier, attempt.AttemptID)
   622  
   623  	// Report success to mission control.
   624  	err = p.router.cfg.MissionControl.ReportPaymentSuccess(
   625  		attempt.AttemptID, &attempt.Route,
   626  	)
   627  	if err != nil {
   628  		log.Errorf("Error reporting payment success to mc: %v",
   629  			err)
   630  	}
   631  
   632  	// In case of success we atomically store settle result to the DB move
   633  	// the shard to the settled state.
   634  	htlcAttempt, err := p.router.cfg.Control.SettleAttempt(
   635  		p.identifier, attempt.AttemptID,
   636  		&channeldb.HTLCSettleInfo{
   637  			Preimage:   result.Preimage,
   638  			SettleTime: p.router.cfg.Clock.Now(),
   639  		},
   640  	)
   641  	if err != nil {
   642  		log.Errorf("Unable to succeed payment attempt: %v", err)
   643  		return nil, err
   644  	}
   645  
   646  	return &shardResult{
   647  		attempt: htlcAttempt,
   648  	}, nil
   649  }
   650  
   651  // createNewPaymentAttempt creates a new payment attempt from the given route.
   652  func (p *shardHandler) createNewPaymentAttempt(rt *route.Route, lastShard bool) (
   653  	lnwire.ShortChannelID, *lnwire.UpdateAddHTLC,
   654  	*channeldb.HTLCAttemptInfo, error) {
   655  
   656  	// Generate a new key to be used for this attempt.
   657  	sessionKey, err := generateNewSessionKey()
   658  	if err != nil {
   659  		return lnwire.ShortChannelID{}, nil, nil, err
   660  	}
   661  
   662  	// We generate a new, unique payment ID that we will use for
   663  	// this HTLC.
   664  	attemptID, err := p.router.cfg.NextPaymentID()
   665  	if err != nil {
   666  		return lnwire.ShortChannelID{}, nil, nil, err
   667  	}
   668  
   669  	// Requesst a new shard from the ShardTracker. If this is an AMP
   670  	// payment, and this is the last shard, the outstanding shards together
   671  	// with ths one will be enough for the receiver to derive all HTLC
   672  	// preimages. If this a non-AMP payment, the ShardTracker will return a
   673  	// simple shard with the payment's static payment hash.
   674  	shard, err := p.shardTracker.NewShard(attemptID, lastShard)
   675  	if err != nil {
   676  		return lnwire.ShortChannelID{}, nil, nil, err
   677  	}
   678  
   679  	// It this shard carries MPP or AMP options, add them to the last hop
   680  	// on the route.
   681  	hop := rt.Hops[len(rt.Hops)-1]
   682  	if shard.MPP() != nil {
   683  		hop.MPP = shard.MPP()
   684  	}
   685  
   686  	if shard.AMP() != nil {
   687  		hop.AMP = shard.AMP()
   688  	}
   689  
   690  	// Generate the raw encoded sphinx packet to be included along
   691  	// with the htlcAdd message that we send directly to the
   692  	// switch.
   693  	hash := shard.Hash()
   694  	onionBlob, _, err := generateSphinxPacket(rt, hash[:], sessionKey)
   695  	if err != nil {
   696  		return lnwire.ShortChannelID{}, nil, nil, err
   697  	}
   698  
   699  	// Craft an HTLC packet to send to the layer 2 switch. The
   700  	// metadata within this packet will be used to route the
   701  	// payment through the network, starting with the first-hop.
   702  	htlcAdd := &lnwire.UpdateAddHTLC{
   703  		Amount:      rt.TotalAmount,
   704  		Expiry:      rt.TotalTimeLock,
   705  		PaymentHash: hash,
   706  	}
   707  	copy(htlcAdd.OnionBlob[:], onionBlob)
   708  
   709  	// Attempt to send this payment through the network to complete
   710  	// the payment. If this attempt fails, then we'll continue on
   711  	// to the next available route.
   712  	firstHop := lnwire.NewShortChanIDFromInt(
   713  		rt.Hops[0].ChannelID,
   714  	)
   715  
   716  	// We now have all the information needed to populate the current
   717  	// attempt information.
   718  	attempt := channeldb.NewHtlcAttemptInfo(
   719  		attemptID, sessionKey, *rt, p.router.cfg.Clock.Now(), &hash,
   720  	)
   721  
   722  	return firstHop, htlcAdd, attempt, nil
   723  }
   724  
   725  // sendPaymentAttempt attempts to send the current attempt to the switch.
   726  func (p *shardHandler) sendPaymentAttempt(
   727  	attempt *channeldb.HTLCAttemptInfo, firstHop lnwire.ShortChannelID,
   728  	htlcAdd *lnwire.UpdateAddHTLC) error {
   729  
   730  	log.Tracef("Attempting to send payment %v (pid=%v), "+
   731  		"using route: %v", p.identifier, attempt.AttemptID,
   732  		newLogClosure(func() string {
   733  			return spew.Sdump(attempt.Route)
   734  		}),
   735  	)
   736  
   737  	// Send it to the Switch. When this method returns we assume
   738  	// the Switch successfully has persisted the payment attempt,
   739  	// such that we can resume waiting for the result after a
   740  	// restart.
   741  	err := p.router.cfg.Payer.SendHTLC(
   742  		firstHop, attempt.AttemptID, htlcAdd,
   743  	)
   744  	if err != nil {
   745  		log.Errorf("Failed sending attempt %d for payment "+
   746  			"%v to switch: %v", attempt.AttemptID,
   747  			p.identifier, err)
   748  		return err
   749  	}
   750  
   751  	log.Debugf("Payment %v (pid=%v) successfully sent to switch, route: %v",
   752  		p.identifier, attempt.AttemptID, &attempt.Route)
   753  
   754  	return nil
   755  }
   756  
   757  // handleSendError inspects the given error from the Switch and determines
   758  // whether we should make another payment attempt, or if it should be
   759  // considered a terminal error. Terminal errors will be recorded with the
   760  // control tower. It analyzes the sendErr for the payment attempt received from
   761  // the switch and updates mission control and/or channel policies. Depending on
   762  // the error type, the error is either the final outcome of the payment or we
   763  // need to continue with an alternative route. A final outcome is indicated by
   764  // a non-nil reason value.
   765  func (p *shardHandler) handleSendError(attempt *channeldb.HTLCAttemptInfo,
   766  	sendErr error) error {
   767  
   768  	internalErrorReason := channeldb.FailureReasonError
   769  
   770  	// failPayment is a helper closure that fails the payment via the
   771  	// router's control tower, which marks the payment as failed in db.
   772  	failPayment := func(reason *channeldb.FailureReason,
   773  		sendErr error) error {
   774  
   775  		log.Infof("Payment %v failed: final_outcome=%v, raw_err=%v",
   776  			p.identifier, *reason, sendErr)
   777  
   778  		// Fail the payment via control tower.
   779  		if err := p.router.cfg.Control.Fail(
   780  			p.identifier, *reason); err != nil {
   781  
   782  			log.Errorf("unable to report failure to control "+
   783  				"tower: %v", err)
   784  
   785  			return &internalErrorReason
   786  		}
   787  
   788  		return reason
   789  	}
   790  
   791  	// reportFail is a helper closure that reports the failure to the
   792  	// mission control, which helps us to decide whether we want to retry
   793  	// the payment or not. If a non nil reason is returned from mission
   794  	// control, it will further fail the payment via control tower.
   795  	reportFail := func(srcIdx *int, msg lnwire.FailureMessage) error {
   796  		// Report outcome to mission control.
   797  		reason, err := p.router.cfg.MissionControl.ReportPaymentFail(
   798  			attempt.AttemptID, &attempt.Route, srcIdx, msg,
   799  		)
   800  		if err != nil {
   801  			log.Errorf("Error reporting payment result to mc: %v",
   802  				err)
   803  
   804  			reason = &internalErrorReason
   805  		}
   806  
   807  		// Exit early if there's no reason.
   808  		if reason == nil {
   809  			return nil
   810  		}
   811  
   812  		return failPayment(reason, sendErr)
   813  	}
   814  
   815  	if sendErr == htlcswitch.ErrUnreadableFailureMessage {
   816  		log.Tracef("Unreadable failure when sending htlc")
   817  
   818  		return reportFail(nil, nil)
   819  	}
   820  
   821  	// If the error is a ClearTextError, we have received a valid wire
   822  	// failure message, either from our own outgoing link or from a node
   823  	// down the route. If the error is not related to the propagation of
   824  	// our payment, we can stop trying because an internal error has
   825  	// occurred.
   826  	rtErr, ok := sendErr.(htlcswitch.ClearTextError)
   827  	if !ok {
   828  		return failPayment(&internalErrorReason, sendErr)
   829  	}
   830  
   831  	// failureSourceIdx is the index of the node that the failure occurred
   832  	// at. If the ClearTextError received is not a ForwardingError the
   833  	// payment error occurred at our node, so we leave this value as 0
   834  	// to indicate that the failure occurred locally. If the error is a
   835  	// ForwardingError, it did not originate at our node, so we set
   836  	// failureSourceIdx to the index of the node where the failure occurred.
   837  	failureSourceIdx := 0
   838  	source, ok := rtErr.(*htlcswitch.ForwardingError)
   839  	if ok {
   840  		failureSourceIdx = source.FailureSourceIdx
   841  	}
   842  
   843  	// Extract the wire failure and apply channel update if it contains one.
   844  	// If we received an unknown failure message from a node along the
   845  	// route, the failure message will be nil.
   846  	failureMessage := rtErr.WireMessage()
   847  	err := p.handleFailureMessage(
   848  		&attempt.Route, failureSourceIdx, failureMessage,
   849  	)
   850  	if err != nil {
   851  		return failPayment(&internalErrorReason, sendErr)
   852  	}
   853  
   854  	log.Tracef("Node=%v reported failure when sending htlc",
   855  		failureSourceIdx)
   856  
   857  	return reportFail(&failureSourceIdx, failureMessage)
   858  }
   859  
   860  // handleFailureMessage tries to apply a channel update present in the failure
   861  // message if any.
   862  func (p *shardHandler) handleFailureMessage(rt *route.Route,
   863  	errorSourceIdx int, failure lnwire.FailureMessage) error {
   864  
   865  	if failure == nil {
   866  		return nil
   867  	}
   868  
   869  	// It makes no sense to apply our own channel updates.
   870  	if errorSourceIdx == 0 {
   871  		log.Errorf("Channel update of ourselves received")
   872  
   873  		return nil
   874  	}
   875  
   876  	// Extract channel update if the error contains one.
   877  	update := p.router.extractChannelUpdate(failure)
   878  	if update == nil {
   879  		return nil
   880  	}
   881  
   882  	// Parse pubkey to allow validation of the channel update. This should
   883  	// always succeed, otherwise there is something wrong in our
   884  	// implementation. Therefore return an error.
   885  	errVertex := rt.Hops[errorSourceIdx-1].PubKeyBytes
   886  	errSource, err := secp256k1.ParsePubKey(
   887  		errVertex[:],
   888  	)
   889  	if err != nil {
   890  		log.Errorf("Cannot parse pubkey: idx=%v, pubkey=%v",
   891  			errorSourceIdx, errVertex)
   892  
   893  		return err
   894  	}
   895  
   896  	var (
   897  		isAdditionalEdge bool
   898  		policy           *channeldb.CachedEdgePolicy
   899  	)
   900  
   901  	// Before we apply the channel update, we need to decide whether the
   902  	// update is for additional (ephemeral) edge or normal edge stored in
   903  	// db.
   904  	//
   905  	// Note: the p.paySession might be nil here if it's called inside
   906  	// SendToRoute where there's no payment lifecycle.
   907  	if p.paySession != nil {
   908  		policy = p.paySession.GetAdditionalEdgePolicy(
   909  			errSource, update.ShortChannelID.ToUint64(),
   910  		)
   911  		if policy != nil {
   912  			isAdditionalEdge = true
   913  		}
   914  	}
   915  
   916  	// Apply channel update to additional edge policy.
   917  	if isAdditionalEdge {
   918  		if !p.paySession.UpdateAdditionalEdge(
   919  			update, errSource, policy) {
   920  
   921  			log.Debugf("Invalid channel update received: node=%v",
   922  				errVertex)
   923  		}
   924  		return nil
   925  	}
   926  
   927  	// Apply channel update to the channel edge policy in our db.
   928  	if !p.router.applyChannelUpdate(update, errSource) {
   929  		log.Debugf("Invalid channel update received: node=%v",
   930  			errVertex)
   931  	}
   932  	return nil
   933  }
   934  
   935  // failAttempt calls control tower to fail the current payment attempt.
   936  func (p *shardHandler) failAttempt(attempt *channeldb.HTLCAttemptInfo,
   937  	sendError error) (*channeldb.HTLCAttempt, error) {
   938  
   939  	log.Warnf("Attempt %v for payment %v failed: %v", attempt.AttemptID,
   940  		p.identifier, sendError)
   941  
   942  	failInfo := marshallError(
   943  		sendError,
   944  		p.router.cfg.Clock.Now(),
   945  	)
   946  
   947  	// Now that we are failing this payment attempt, cancel the shard with
   948  	// the ShardTracker such that it can derive the correct hash for the
   949  	// next attempt.
   950  	if err := p.shardTracker.CancelShard(attempt.AttemptID); err != nil {
   951  		return nil, err
   952  	}
   953  
   954  	return p.router.cfg.Control.FailAttempt(
   955  		p.identifier, attempt.AttemptID,
   956  		failInfo,
   957  	)
   958  }
   959  
   960  // marshallError marshall an error as received from the switch to a structure
   961  // that is suitable for database storage.
   962  func marshallError(sendError error, time time.Time) *channeldb.HTLCFailInfo {
   963  	response := &channeldb.HTLCFailInfo{
   964  		FailTime: time,
   965  	}
   966  
   967  	switch sendError {
   968  
   969  	case htlcswitch.ErrPaymentIDNotFound:
   970  		response.Reason = channeldb.HTLCFailInternal
   971  		return response
   972  
   973  	case htlcswitch.ErrUnreadableFailureMessage:
   974  		response.Reason = channeldb.HTLCFailUnreadable
   975  		return response
   976  	}
   977  
   978  	rtErr, ok := sendError.(htlcswitch.ClearTextError)
   979  	if !ok {
   980  		response.Reason = channeldb.HTLCFailInternal
   981  		return response
   982  	}
   983  
   984  	message := rtErr.WireMessage()
   985  	if message != nil {
   986  		response.Reason = channeldb.HTLCFailMessage
   987  		response.Message = message
   988  	} else {
   989  		response.Reason = channeldb.HTLCFailUnknown
   990  	}
   991  
   992  	// If the ClearTextError received is a ForwardingError, the error
   993  	// originated from a node along the route, not locally on our outgoing
   994  	// link. We set failureSourceIdx to the index of the node where the
   995  	// failure occurred. If the error is not a ForwardingError, the failure
   996  	// occurred at our node, so we leave the index as 0 to indicate that
   997  	// we failed locally.
   998  	fErr, ok := rtErr.(*htlcswitch.ForwardingError)
   999  	if ok {
  1000  		response.FailureSourceIndex = uint32(fErr.FailureSourceIdx)
  1001  	}
  1002  
  1003  	return response
  1004  }