github.com/decred/dcrlnd@v0.7.6/autopilot/agent.go (about)

     1  package autopilot
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"math/rand"
     7  	"net"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/davecgh/go-spew/spew"
    12  	"github.com/decred/dcrd/dcrec/secp256k1/v4"
    13  	"github.com/decred/dcrd/dcrutil/v4"
    14  	"github.com/decred/dcrlnd/lnwire"
    15  )
    16  
    17  // Config couples all the items that an autopilot agent needs to function.
    18  // All items within the struct MUST be populated for the Agent to be able to
    19  // carry out its duties.
    20  type Config struct {
    21  	// Self is the identity public key of the Lightning Network node that
    22  	// is being driven by the agent. This is used to ensure that we don't
    23  	// accidentally attempt to open a channel with ourselves.
    24  	Self *secp256k1.PublicKey
    25  
    26  	// Heuristic is an attachment heuristic which will govern to whom we
    27  	// open channels to, and also what those channels look like in terms of
    28  	// desired capacity. The Heuristic will take into account the current
    29  	// state of the graph, our set of open channels, and the amount of
    30  	// available funds when determining how channels are to be opened.
    31  	// Additionally, a heuristic make also factor in extra-graph
    32  	// information in order to make more pertinent recommendations.
    33  	Heuristic AttachmentHeuristic
    34  
    35  	// ChanController is an interface that is able to directly manage the
    36  	// creation, closing and update of channels within the network.
    37  	ChanController ChannelController
    38  
    39  	// ConnectToPeer attempts to connect to the peer using one of its
    40  	// advertised addresses. The boolean returned signals whether the peer
    41  	// was already connected.
    42  	ConnectToPeer func(*secp256k1.PublicKey, []net.Addr) (bool, error)
    43  
    44  	// DisconnectPeer attempts to disconnect the peer with the given public
    45  	// key.
    46  	DisconnectPeer func(*secp256k1.PublicKey) error
    47  
    48  	// WalletBalance is a function closure that should return the current
    49  	// available balance of the backing wallet.
    50  	WalletBalance func() (dcrutil.Amount, error)
    51  
    52  	// Graph is an abstract channel graph that the Heuristic and the Agent
    53  	// will use to make decisions w.r.t channel allocation and placement
    54  	// within the graph.
    55  	Graph ChannelGraph
    56  
    57  	// Constraints is the set of constraints the autopilot must adhere to
    58  	// when opening channels.
    59  	Constraints AgentConstraints
    60  
    61  	// TODO(roasbeef): add additional signals from fee rates and revenue of
    62  	// currently opened channels
    63  }
    64  
    65  // channelState is a type that represents the set of active channels of the
    66  // backing LN node that the Agent should be aware of. This type contains a few
    67  // helper utility methods.
    68  type channelState map[lnwire.ShortChannelID]LocalChannel
    69  
    70  // Channels returns a slice of all the active channels.
    71  func (c channelState) Channels() []LocalChannel {
    72  	chans := make([]LocalChannel, 0, len(c))
    73  	for _, channel := range c {
    74  		chans = append(chans, channel)
    75  	}
    76  	return chans
    77  }
    78  
    79  // ConnectedNodes returns the set of nodes we currently have a channel with.
    80  // This information is needed as we want to avoid making repeated channels with
    81  // any node.
    82  func (c channelState) ConnectedNodes() map[NodeID]struct{} {
    83  	nodes := make(map[NodeID]struct{})
    84  	for _, channels := range c {
    85  		nodes[channels.Node] = struct{}{}
    86  	}
    87  
    88  	// TODO(roasbeef): add outgoing, nodes, allow incoming and outgoing to
    89  	// per node
    90  	//  * only add node is chan as funding amt set
    91  
    92  	return nodes
    93  }
    94  
    95  // Agent implements a closed-loop control system which seeks to autonomously
    96  // optimize the allocation of base units within channels throughput the
    97  // network's channel graph. An agent is configurable by swapping out different
    98  // AttachmentHeuristic strategies. The agent uses external signals such as the
    99  // wallet balance changing, or new channels being opened/closed for the local
   100  // node as an indicator to re-examine its internal state, and the amount of
   101  // available funds in order to make updated decisions w.r.t the channel graph.
   102  // The Agent will automatically open, close, and splice in/out channel as
   103  // necessary for it to step closer to its optimal state.
   104  //
   105  // TODO(roasbeef): prob re-word
   106  type Agent struct {
   107  	started sync.Once
   108  	stopped sync.Once
   109  
   110  	// cfg houses the configuration state of the Ant.
   111  	cfg Config
   112  
   113  	// chanState tracks the current set of open channels.
   114  	chanState    channelState
   115  	chanStateMtx sync.Mutex
   116  
   117  	// stateUpdates is a channel that any external state updates that may
   118  	// affect the heuristics of the agent will be sent over.
   119  	stateUpdates chan interface{}
   120  
   121  	// balanceUpdates is a channel where notifications about updates to the
   122  	// wallet's balance will be sent. This channel will be buffered to
   123  	// ensure we have at most one pending update of this type to handle at
   124  	// a given time.
   125  	balanceUpdates chan *balanceUpdate
   126  
   127  	// nodeUpdates is a channel that changes to the graph node landscape
   128  	// will be sent over. This channel will be buffered to ensure we have
   129  	// at most one pending update of this type to handle at a given time.
   130  	nodeUpdates chan *nodeUpdates
   131  
   132  	// pendingOpenUpdates is a channel where updates about channel pending
   133  	// opening will be sent. This channel will be buffered to ensure we
   134  	// have at most one pending update of this type to handle at a given
   135  	// time.
   136  	pendingOpenUpdates chan *chanPendingOpenUpdate
   137  
   138  	// chanOpenFailures is a channel where updates about channel open
   139  	// failures will be sent. This channel will be buffered to ensure we
   140  	// have at most one pending update of this type to handle at a given
   141  	// time.
   142  	chanOpenFailures chan *chanOpenFailureUpdate
   143  
   144  	// heuristicUpdates is a channel where updates from active heurstics
   145  	// will be sent.
   146  	heuristicUpdates chan *heuristicUpdate
   147  
   148  	// totalBalance is the total number of base units the backing wallet is
   149  	// known to control at any given instance. This value will be updated
   150  	// when the agent receives external balance update signals.
   151  	totalBalance dcrutil.Amount
   152  
   153  	// failedNodes lists nodes that we've previously attempted to initiate
   154  	// channels with, but didn't succeed.
   155  	failedNodes map[NodeID]struct{}
   156  
   157  	// pendingConns tracks the nodes that we are attempting to make
   158  	// connections to. This prevents us from making duplicate connection
   159  	// requests to the same node.
   160  	pendingConns map[NodeID]struct{}
   161  
   162  	// pendingOpens tracks the channels that we've requested to be
   163  	// initiated, but haven't yet been confirmed as being fully opened.
   164  	// This state is required as otherwise, we may go over our allotted
   165  	// channel limit, or open multiple channels to the same node.
   166  	pendingOpens map[NodeID]LocalChannel
   167  	pendingMtx   sync.Mutex
   168  
   169  	quit chan struct{}
   170  	wg   sync.WaitGroup
   171  }
   172  
   173  // New creates a new instance of the Agent instantiated using the passed
   174  // configuration and initial channel state. The initial channel state slice
   175  // should be populated with the set of Channels that are currently opened by
   176  // the backing Lightning Node.
   177  func New(cfg Config, initialState []LocalChannel) (*Agent, error) {
   178  	a := &Agent{
   179  		cfg:                cfg,
   180  		chanState:          make(map[lnwire.ShortChannelID]LocalChannel),
   181  		quit:               make(chan struct{}),
   182  		stateUpdates:       make(chan interface{}),
   183  		balanceUpdates:     make(chan *balanceUpdate, 1),
   184  		nodeUpdates:        make(chan *nodeUpdates, 1),
   185  		chanOpenFailures:   make(chan *chanOpenFailureUpdate, 1),
   186  		heuristicUpdates:   make(chan *heuristicUpdate, 1),
   187  		pendingOpenUpdates: make(chan *chanPendingOpenUpdate, 1),
   188  		failedNodes:        make(map[NodeID]struct{}),
   189  		pendingConns:       make(map[NodeID]struct{}),
   190  		pendingOpens:       make(map[NodeID]LocalChannel),
   191  	}
   192  
   193  	for _, c := range initialState {
   194  		a.chanState[c.ChanID] = c
   195  	}
   196  
   197  	return a, nil
   198  }
   199  
   200  // Start starts the agent along with any goroutines it needs to perform its
   201  // normal duties.
   202  func (a *Agent) Start() error {
   203  	var err error
   204  	a.started.Do(func() {
   205  		err = a.start()
   206  	})
   207  	return err
   208  }
   209  
   210  func (a *Agent) start() error {
   211  	rand.Seed(time.Now().Unix())
   212  	log.Infof("Autopilot Agent starting")
   213  
   214  	a.wg.Add(1)
   215  	go a.controller()
   216  
   217  	return nil
   218  }
   219  
   220  // Stop signals the Agent to gracefully shutdown. This function will block
   221  // until all goroutines have exited.
   222  func (a *Agent) Stop() error {
   223  	var err error
   224  	a.stopped.Do(func() {
   225  		err = a.stop()
   226  	})
   227  	return err
   228  }
   229  
   230  func (a *Agent) stop() error {
   231  	log.Infof("Autopilot Agent stopping")
   232  
   233  	close(a.quit)
   234  	a.wg.Wait()
   235  
   236  	return nil
   237  }
   238  
   239  // balanceUpdate is a type of external state update that reflects an
   240  // increase/decrease in the funds currently available to the wallet.
   241  type balanceUpdate struct {
   242  }
   243  
   244  // nodeUpdates is a type of external state update that reflects an addition or
   245  // modification in channel graph node membership.
   246  type nodeUpdates struct{}
   247  
   248  // chanOpenUpdate is a type of external state update that indicates a new
   249  // channel has been opened, either by the Agent itself (within the main
   250  // controller loop), or by an external user to the system.
   251  type chanOpenUpdate struct {
   252  	newChan LocalChannel
   253  }
   254  
   255  // chanPendingOpenUpdate is a type of external state update that indicates a new
   256  // channel has been opened, either by the agent itself or an external subsystem,
   257  // but is still pending.
   258  type chanPendingOpenUpdate struct{}
   259  
   260  // chanOpenFailureUpdate is a type of external state update that indicates
   261  // a previous channel open failed, and that it might be possible to try again.
   262  type chanOpenFailureUpdate struct{}
   263  
   264  // heuristicUpdate is an update sent when one of the autopilot heuristics has
   265  // changed, and prompts the agent to make a new attempt at opening more
   266  // channels.
   267  type heuristicUpdate struct {
   268  	heuristic AttachmentHeuristic
   269  }
   270  
   271  // chanCloseUpdate is a type of external state update that indicates that the
   272  // backing Lightning Node has closed a previously open channel.
   273  type chanCloseUpdate struct {
   274  	closedChans []lnwire.ShortChannelID
   275  }
   276  
   277  // OnBalanceChange is a callback that should be executed each time the balance
   278  // of the backing wallet changes.
   279  func (a *Agent) OnBalanceChange() {
   280  	select {
   281  	case a.balanceUpdates <- &balanceUpdate{}:
   282  	default:
   283  	}
   284  }
   285  
   286  // OnNodeUpdates is a callback that should be executed each time our channel
   287  // graph has new nodes or their node announcements are updated.
   288  func (a *Agent) OnNodeUpdates() {
   289  	select {
   290  	case a.nodeUpdates <- &nodeUpdates{}:
   291  	default:
   292  	}
   293  }
   294  
   295  // OnChannelOpen is a callback that should be executed each time a new channel
   296  // is manually opened by the user or any system outside the autopilot agent.
   297  func (a *Agent) OnChannelOpen(c LocalChannel) {
   298  	a.wg.Add(1)
   299  	go func() {
   300  		defer a.wg.Done()
   301  
   302  		select {
   303  		case a.stateUpdates <- &chanOpenUpdate{newChan: c}:
   304  		case <-a.quit:
   305  		}
   306  	}()
   307  }
   308  
   309  // OnChannelPendingOpen is a callback that should be executed each time a new
   310  // channel is opened, either by the agent or an external subsystems, but is
   311  // still pending.
   312  func (a *Agent) OnChannelPendingOpen() {
   313  	select {
   314  	case a.pendingOpenUpdates <- &chanPendingOpenUpdate{}:
   315  	default:
   316  	}
   317  }
   318  
   319  // OnChannelOpenFailure is a callback that should be executed when the
   320  // autopilot has attempted to open a channel, but failed. In this case we can
   321  // retry channel creation with a different node.
   322  func (a *Agent) OnChannelOpenFailure() {
   323  	select {
   324  	case a.chanOpenFailures <- &chanOpenFailureUpdate{}:
   325  	default:
   326  	}
   327  }
   328  
   329  // OnChannelClose is a callback that should be executed each time a prior
   330  // channel has been closed for any reason. This includes regular
   331  // closes, force closes, and channel breaches.
   332  func (a *Agent) OnChannelClose(closedChans ...lnwire.ShortChannelID) {
   333  	a.wg.Add(1)
   334  	go func() {
   335  		defer a.wg.Done()
   336  
   337  		select {
   338  		case a.stateUpdates <- &chanCloseUpdate{closedChans: closedChans}:
   339  		case <-a.quit:
   340  		}
   341  	}()
   342  }
   343  
   344  // OnHeuristicUpdate is a method called when a heuristic has been updated, to
   345  // trigger the agent to do a new state assessment.
   346  func (a *Agent) OnHeuristicUpdate(h AttachmentHeuristic) {
   347  	select {
   348  	case a.heuristicUpdates <- &heuristicUpdate{
   349  		heuristic: h,
   350  	}:
   351  	default:
   352  	}
   353  }
   354  
   355  // mergeNodeMaps merges the Agent's set of nodes that it already has active
   356  // channels open to, with the other sets of nodes that should be removed from
   357  // consideration during heuristic selection. This ensures that the Agent doesn't
   358  // attempt to open any "duplicate" channels to the same node.
   359  func mergeNodeMaps(c map[NodeID]LocalChannel,
   360  	skips ...map[NodeID]struct{}) map[NodeID]struct{} {
   361  
   362  	numNodes := len(c)
   363  	for _, skip := range skips {
   364  		numNodes += len(skip)
   365  	}
   366  
   367  	res := make(map[NodeID]struct{}, numNodes)
   368  	for nodeID := range c {
   369  		res[nodeID] = struct{}{}
   370  	}
   371  	for _, skip := range skips {
   372  		for nodeID := range skip {
   373  			res[nodeID] = struct{}{}
   374  		}
   375  	}
   376  
   377  	return res
   378  }
   379  
   380  // mergeChanState merges the Agent's set of active channels, with the set of
   381  // channels awaiting confirmation. This ensures that the agent doesn't go over
   382  // the prescribed channel limit or fund allocation limit.
   383  func mergeChanState(pendingChans map[NodeID]LocalChannel,
   384  	activeChans channelState) []LocalChannel {
   385  
   386  	numChans := len(pendingChans) + len(activeChans)
   387  	totalChans := make([]LocalChannel, 0, numChans)
   388  
   389  	totalChans = append(totalChans, activeChans.Channels()...)
   390  	for _, pendingChan := range pendingChans {
   391  		totalChans = append(totalChans, pendingChan)
   392  	}
   393  
   394  	return totalChans
   395  }
   396  
   397  // controller implements the closed-loop control system of the Agent. The
   398  // controller will make a decision w.r.t channel placement within the graph
   399  // based on: its current internal state of the set of active channels open,
   400  // and external state changes as a result of decisions it makes w.r.t channel
   401  // allocation, or attributes affecting its control loop being updated by the
   402  // backing Lightning Node.
   403  func (a *Agent) controller() {
   404  	defer a.wg.Done()
   405  
   406  	// We'll start off by assigning our starting balance, and injecting
   407  	// that amount as an initial wake up to the main controller goroutine.
   408  	a.OnBalanceChange()
   409  
   410  	// TODO(roasbeef): do we in fact need to maintain order?
   411  	//  * use sync.Cond if so
   412  	updateBalance := func() {
   413  		newBalance, err := a.cfg.WalletBalance()
   414  		if err != nil {
   415  			log.Warnf("unable to update wallet balance: %v", err)
   416  			return
   417  		}
   418  
   419  		a.totalBalance = newBalance
   420  	}
   421  
   422  	// TODO(roasbeef): add 10-minute wake up timer
   423  	for {
   424  		select {
   425  		// A new external signal has arrived. We'll use this to update
   426  		// our internal state, then determine if we should trigger a
   427  		// channel state modification (open/close, splice in/out).
   428  		case signal := <-a.stateUpdates:
   429  			log.Infof("Processing new external signal")
   430  
   431  			switch update := signal.(type) {
   432  			// A new channel has been opened successfully. This was
   433  			// either opened by the Agent, or an external system
   434  			// that is able to drive the Lightning Node.
   435  			case *chanOpenUpdate:
   436  				log.Debugf("New channel successfully opened, "+
   437  					"updating state with: %v",
   438  					spew.Sdump(update.newChan))
   439  
   440  				newChan := update.newChan
   441  				a.chanStateMtx.Lock()
   442  				a.chanState[newChan.ChanID] = newChan
   443  				a.chanStateMtx.Unlock()
   444  
   445  				a.pendingMtx.Lock()
   446  				delete(a.pendingOpens, newChan.Node)
   447  				a.pendingMtx.Unlock()
   448  
   449  				updateBalance()
   450  			// A channel has been closed, this may free up an
   451  			// available slot, triggering a new channel update.
   452  			case *chanCloseUpdate:
   453  				log.Debugf("Applying closed channel "+
   454  					"updates: %v",
   455  					spew.Sdump(update.closedChans))
   456  
   457  				a.chanStateMtx.Lock()
   458  				for _, closedChan := range update.closedChans {
   459  					delete(a.chanState, closedChan)
   460  				}
   461  				a.chanStateMtx.Unlock()
   462  
   463  				updateBalance()
   464  			}
   465  
   466  		// A new channel has been opened by the agent or an external
   467  		// subsystem, but is still pending confirmation.
   468  		case <-a.pendingOpenUpdates:
   469  			updateBalance()
   470  
   471  		// The balance of the backing wallet has changed, if more funds
   472  		// are now available, we may attempt to open up an additional
   473  		// channel, or splice in funds to an existing one.
   474  		case <-a.balanceUpdates:
   475  			log.Debug("Applying external balance state update")
   476  
   477  			updateBalance()
   478  
   479  		// The channel we tried to open previously failed for whatever
   480  		// reason.
   481  		case <-a.chanOpenFailures:
   482  			log.Debug("Retrying after previous channel open " +
   483  				"failure.")
   484  
   485  			updateBalance()
   486  
   487  		// New nodes have been added to the graph or their node
   488  		// announcements have been updated. We will consider opening
   489  		// channels to these nodes if we haven't stabilized.
   490  		case <-a.nodeUpdates:
   491  			log.Debugf("Node updates received, assessing " +
   492  				"need for more channels")
   493  
   494  		// Any of the deployed heuristics has been updated, check
   495  		// whether we have new channel candidates available.
   496  		case upd := <-a.heuristicUpdates:
   497  			log.Debugf("Heuristic %v updated, assessing need for "+
   498  				"more channels", upd.heuristic.Name())
   499  
   500  		// The agent has been signalled to exit, so we'll bail out
   501  		// immediately.
   502  		case <-a.quit:
   503  			return
   504  		}
   505  
   506  		a.pendingMtx.Lock()
   507  		log.Debugf("Pending channels: %v", spew.Sdump(a.pendingOpens))
   508  		a.pendingMtx.Unlock()
   509  
   510  		// With all the updates applied, we'll obtain a set of the
   511  		// current active channels (confirmed channels), and also
   512  		// factor in our set of unconfirmed channels.
   513  		a.chanStateMtx.Lock()
   514  		a.pendingMtx.Lock()
   515  		totalChans := mergeChanState(a.pendingOpens, a.chanState)
   516  		a.pendingMtx.Unlock()
   517  		a.chanStateMtx.Unlock()
   518  
   519  		// Now that we've updated our internal state, we'll consult our
   520  		// channel attachment heuristic to determine if we can open
   521  		// up any additional channels while staying within our
   522  		// constraints.
   523  		availableFunds, numChans := a.cfg.Constraints.ChannelBudget(
   524  			totalChans, a.totalBalance,
   525  		)
   526  		switch {
   527  		case numChans == 0:
   528  			continue
   529  
   530  		// If the amount is too small, we don't want to attempt opening
   531  		// another channel.
   532  		case availableFunds == 0:
   533  			continue
   534  		case availableFunds < a.cfg.Constraints.MinChanSize():
   535  			continue
   536  		}
   537  
   538  		log.Infof("Triggering attachment directive dispatch, "+
   539  			"total_funds=%v", a.totalBalance)
   540  
   541  		err := a.openChans(availableFunds, numChans, totalChans)
   542  		if err != nil {
   543  			log.Errorf("Unable to open channels: %v", err)
   544  		}
   545  	}
   546  }
   547  
   548  // openChans queries the agent's heuristic for a set of channel candidates, and
   549  // attempts to open channels to them.
   550  func (a *Agent) openChans(availableFunds dcrutil.Amount, numChans uint32,
   551  	totalChans []LocalChannel) error {
   552  
   553  	// As channel size we'll use the maximum channel size available.
   554  	chanSize := a.cfg.Constraints.MaxChanSize()
   555  	if availableFunds < chanSize {
   556  		chanSize = availableFunds
   557  	}
   558  
   559  	if chanSize < a.cfg.Constraints.MinChanSize() {
   560  		return fmt.Errorf("not enough funds available to open a " +
   561  			"single channel")
   562  	}
   563  
   564  	// We're to attempt an attachment so we'll obtain the set of
   565  	// nodes that we currently have channels with so we avoid
   566  	// duplicate edges.
   567  	a.chanStateMtx.Lock()
   568  	connectedNodes := a.chanState.ConnectedNodes()
   569  	a.chanStateMtx.Unlock()
   570  
   571  	for nID := range connectedNodes {
   572  		log.Tracef("Skipping node %x with open channel", nID[:])
   573  	}
   574  
   575  	a.pendingMtx.Lock()
   576  
   577  	for nID := range a.pendingOpens {
   578  		log.Tracef("Skipping node %x with pending channel open", nID[:])
   579  	}
   580  
   581  	for nID := range a.pendingConns {
   582  		log.Tracef("Skipping node %x with pending connection", nID[:])
   583  	}
   584  
   585  	for nID := range a.failedNodes {
   586  		log.Tracef("Skipping failed node %v", nID[:])
   587  	}
   588  
   589  	nodesToSkip := mergeNodeMaps(a.pendingOpens,
   590  		a.pendingConns, connectedNodes, a.failedNodes,
   591  	)
   592  
   593  	a.pendingMtx.Unlock()
   594  
   595  	// Gather the set of all nodes in the graph, except those we
   596  	// want to skip.
   597  	selfPubBytes := a.cfg.Self.SerializeCompressed()
   598  	nodes := make(map[NodeID]struct{})
   599  	addresses := make(map[NodeID][]net.Addr)
   600  	if err := a.cfg.Graph.ForEachNode(func(node Node) error {
   601  		nID := NodeID(node.PubKey())
   602  
   603  		// If we come across ourselves, them we'll continue in
   604  		// order to avoid attempting to make a channel with
   605  		// ourselves.
   606  		if bytes.Equal(nID[:], selfPubBytes) {
   607  			log.Tracef("Skipping self node %x", nID[:])
   608  			return nil
   609  		}
   610  
   611  		// If the node has no known addresses, we cannot connect to it,
   612  		// so we'll skip it.
   613  		addrs := node.Addrs()
   614  		if len(addrs) == 0 {
   615  			log.Tracef("Skipping node %x since no addresses known",
   616  				nID[:])
   617  			return nil
   618  		}
   619  		addresses[nID] = addrs
   620  
   621  		// Additionally, if this node is in the blacklist, then
   622  		// we'll skip it.
   623  		if _, ok := nodesToSkip[nID]; ok {
   624  			log.Tracef("Skipping blacklisted node %x", nID[:])
   625  			return nil
   626  		}
   627  
   628  		nodes[nID] = struct{}{}
   629  		return nil
   630  	}); err != nil {
   631  		return fmt.Errorf("unable to get graph nodes: %v", err)
   632  	}
   633  
   634  	// Use the heuristic to calculate a score for each node in the
   635  	// graph.
   636  	log.Debugf("Scoring %d nodes for chan_size=%v", len(nodes), chanSize)
   637  	scores, err := a.cfg.Heuristic.NodeScores(
   638  		a.cfg.Graph, totalChans, chanSize, nodes,
   639  	)
   640  	if err != nil {
   641  		return fmt.Errorf("unable to calculate node scores : %v", err)
   642  	}
   643  
   644  	log.Debugf("Got scores for %d nodes", len(scores))
   645  
   646  	// Now use the score to make a weighted choice which nodes to attempt
   647  	// to open channels to.
   648  	scores, err = chooseN(numChans, scores)
   649  	if err != nil {
   650  		return fmt.Errorf("unable to make weighted choice: %v",
   651  			err)
   652  	}
   653  
   654  	chanCandidates := make(map[NodeID]*AttachmentDirective)
   655  	for nID := range scores {
   656  		log.Tracef("Creating attachment directive for chosen node %x",
   657  			nID[:])
   658  
   659  		// Track the available funds we have left.
   660  		if availableFunds < chanSize {
   661  			chanSize = availableFunds
   662  		}
   663  		availableFunds -= chanSize
   664  
   665  		// If we run out of funds, we can break early.
   666  		if chanSize < a.cfg.Constraints.MinChanSize() {
   667  			log.Tracef("Chan size %v too small to satisfy min "+
   668  				"channel size %v, breaking", chanSize,
   669  				a.cfg.Constraints.MinChanSize())
   670  			break
   671  		}
   672  
   673  		chanCandidates[nID] = &AttachmentDirective{
   674  			NodeID:  nID,
   675  			ChanAmt: chanSize,
   676  			Addrs:   addresses[nID],
   677  		}
   678  	}
   679  
   680  	if len(chanCandidates) == 0 {
   681  		log.Infof("No eligible candidates to connect to")
   682  		return nil
   683  	}
   684  
   685  	log.Infof("Attempting to execute channel attachment "+
   686  		"directives: %v", spew.Sdump(chanCandidates))
   687  
   688  	// Before proceeding, check to see if we have any slots
   689  	// available to open channels. If there are any, we will attempt
   690  	// to dispatch the retrieved directives since we can't be
   691  	// certain which ones may actually succeed. If too many
   692  	// connections succeed, they will be ignored and made
   693  	// available to future heuristic selections.
   694  	a.pendingMtx.Lock()
   695  	defer a.pendingMtx.Unlock()
   696  	if uint16(len(a.pendingOpens)) >= a.cfg.Constraints.MaxPendingOpens() {
   697  		log.Debugf("Reached cap of %v pending "+
   698  			"channel opens, will retry "+
   699  			"after success/failure",
   700  			a.cfg.Constraints.MaxPendingOpens())
   701  		return nil
   702  	}
   703  
   704  	// For each recommended attachment directive, we'll launch a
   705  	// new goroutine to attempt to carry out the directive. If any
   706  	// of these succeed, then we'll receive a new state update,
   707  	// taking us back to the top of our controller loop.
   708  	for _, chanCandidate := range chanCandidates {
   709  		// Skip candidates which we are already trying
   710  		// to establish a connection with.
   711  		nodeID := chanCandidate.NodeID
   712  		if _, ok := a.pendingConns[nodeID]; ok {
   713  			continue
   714  		}
   715  		a.pendingConns[nodeID] = struct{}{}
   716  
   717  		a.wg.Add(1)
   718  		go a.executeDirective(*chanCandidate)
   719  	}
   720  	return nil
   721  }
   722  
   723  // executeDirective attempts to connect to the channel candidate specified by
   724  // the given attachment directive, and open a channel of the given size.
   725  //
   726  // NOTE: MUST be run as a goroutine.
   727  func (a *Agent) executeDirective(directive AttachmentDirective) {
   728  	defer a.wg.Done()
   729  
   730  	// We'll start out by attempting to connect to the peer in order to
   731  	// begin the funding workflow.
   732  	nodeID := directive.NodeID
   733  	pub, err := secp256k1.ParsePubKey(nodeID[:])
   734  	if err != nil {
   735  		log.Errorf("Unable to parse pubkey %x: %v", nodeID, err)
   736  		return
   737  	}
   738  
   739  	connected := make(chan bool)
   740  	errChan := make(chan error)
   741  
   742  	// To ensure a call to ConnectToPeer doesn't block the agent from
   743  	// shutting down, we'll launch it in a non-waitgrouped goroutine, that
   744  	// will signal when a result is returned.
   745  	// TODO(halseth): use DialContext to cancel on transport level.
   746  	go func() {
   747  		alreadyConnected, err := a.cfg.ConnectToPeer(
   748  			pub, directive.Addrs,
   749  		)
   750  		if err != nil {
   751  			select {
   752  			case errChan <- err:
   753  			case <-a.quit:
   754  			}
   755  			return
   756  		}
   757  
   758  		select {
   759  		case connected <- alreadyConnected:
   760  		case <-a.quit:
   761  			return
   762  		}
   763  	}()
   764  
   765  	var alreadyConnected bool
   766  	select {
   767  	case alreadyConnected = <-connected:
   768  	case err = <-errChan:
   769  	case <-a.quit:
   770  		return
   771  	}
   772  
   773  	if err != nil {
   774  		log.Warnf("Unable to connect to %x: %v",
   775  			pub.SerializeCompressed(), err)
   776  
   777  		// Since we failed to connect to them, we'll mark them as
   778  		// failed so that we don't attempt to connect to them again.
   779  		a.pendingMtx.Lock()
   780  		delete(a.pendingConns, nodeID)
   781  		a.failedNodes[nodeID] = struct{}{}
   782  		a.pendingMtx.Unlock()
   783  
   784  		// Finally, we'll trigger the agent to select new peers to
   785  		// connect to.
   786  		a.OnChannelOpenFailure()
   787  
   788  		return
   789  	}
   790  
   791  	// The connection was successful, though before progressing we must
   792  	// check that we have not already met our quota for max pending open
   793  	// channels. This can happen if multiple directives were spawned but
   794  	// fewer slots were available, and other successful attempts finished
   795  	// first.
   796  	a.pendingMtx.Lock()
   797  	if uint16(len(a.pendingOpens)) >= a.cfg.Constraints.MaxPendingOpens() {
   798  		// Since we've reached our max number of pending opens, we'll
   799  		// disconnect this peer and exit. However, if we were
   800  		// previously connected to them, then we'll make sure to
   801  		// maintain the connection alive.
   802  		if alreadyConnected {
   803  			// Since we succeeded in connecting, we won't add this
   804  			// peer to the failed nodes map, but we will remove it
   805  			// from a.pendingConns so that it can be retried in the
   806  			// future.
   807  			delete(a.pendingConns, nodeID)
   808  			a.pendingMtx.Unlock()
   809  			return
   810  		}
   811  
   812  		err = a.cfg.DisconnectPeer(pub)
   813  		if err != nil {
   814  			log.Warnf("Unable to disconnect peer %x: %v",
   815  				pub.SerializeCompressed(), err)
   816  		}
   817  
   818  		// Now that we have disconnected, we can remove this node from
   819  		// our pending conns map, permitting subsequent connection
   820  		// attempts.
   821  		delete(a.pendingConns, nodeID)
   822  		a.pendingMtx.Unlock()
   823  		return
   824  	}
   825  
   826  	// If we were successful, we'll track this peer in our set of pending
   827  	// opens. We do this here to ensure we don't stall on selecting new
   828  	// peers if the connection attempt happens to take too long.
   829  	delete(a.pendingConns, nodeID)
   830  	a.pendingOpens[nodeID] = LocalChannel{
   831  		Balance: directive.ChanAmt,
   832  		Node:    nodeID,
   833  	}
   834  	a.pendingMtx.Unlock()
   835  
   836  	// We can then begin the funding workflow with this peer.
   837  	err = a.cfg.ChanController.OpenChannel(pub, directive.ChanAmt)
   838  	if err != nil {
   839  		log.Warnf("Unable to open channel to %x of %v: %v",
   840  			pub.SerializeCompressed(), directive.ChanAmt, err)
   841  
   842  		// As the attempt failed, we'll clear the peer from the set of
   843  		// pending opens and mark them as failed so we don't attempt to
   844  		// open a channel to them again.
   845  		a.pendingMtx.Lock()
   846  		delete(a.pendingOpens, nodeID)
   847  		a.failedNodes[nodeID] = struct{}{}
   848  		a.pendingMtx.Unlock()
   849  
   850  		// Trigger the agent to re-evaluate everything and possibly
   851  		// retry with a different node.
   852  		a.OnChannelOpenFailure()
   853  
   854  		// Finally, we should also disconnect the peer if we weren't
   855  		// already connected to them beforehand by an external
   856  		// subsystem.
   857  		if alreadyConnected {
   858  			return
   859  		}
   860  
   861  		err = a.cfg.DisconnectPeer(pub)
   862  		if err != nil {
   863  			log.Warnf("Unable to disconnect peer %x: %v",
   864  				pub.SerializeCompressed(), err)
   865  		}
   866  	}
   867  
   868  	// Since the channel open was successful and is currently pending,
   869  	// we'll trigger the autopilot agent to query for more peers.
   870  	// TODO(halseth): this triggers a new loop before all the new channels
   871  	// are added to the pending channels map. Should add before executing
   872  	// directive in goroutine?
   873  	a.OnChannelPendingOpen()
   874  }