github.com/decred/dcrlnd@v0.7.6/autopilot/agent.go (about) 1 package autopilot 2 3 import ( 4 "bytes" 5 "fmt" 6 "math/rand" 7 "net" 8 "sync" 9 "time" 10 11 "github.com/davecgh/go-spew/spew" 12 "github.com/decred/dcrd/dcrec/secp256k1/v4" 13 "github.com/decred/dcrd/dcrutil/v4" 14 "github.com/decred/dcrlnd/lnwire" 15 ) 16 17 // Config couples all the items that an autopilot agent needs to function. 18 // All items within the struct MUST be populated for the Agent to be able to 19 // carry out its duties. 20 type Config struct { 21 // Self is the identity public key of the Lightning Network node that 22 // is being driven by the agent. This is used to ensure that we don't 23 // accidentally attempt to open a channel with ourselves. 24 Self *secp256k1.PublicKey 25 26 // Heuristic is an attachment heuristic which will govern to whom we 27 // open channels to, and also what those channels look like in terms of 28 // desired capacity. The Heuristic will take into account the current 29 // state of the graph, our set of open channels, and the amount of 30 // available funds when determining how channels are to be opened. 31 // Additionally, a heuristic make also factor in extra-graph 32 // information in order to make more pertinent recommendations. 33 Heuristic AttachmentHeuristic 34 35 // ChanController is an interface that is able to directly manage the 36 // creation, closing and update of channels within the network. 37 ChanController ChannelController 38 39 // ConnectToPeer attempts to connect to the peer using one of its 40 // advertised addresses. The boolean returned signals whether the peer 41 // was already connected. 42 ConnectToPeer func(*secp256k1.PublicKey, []net.Addr) (bool, error) 43 44 // DisconnectPeer attempts to disconnect the peer with the given public 45 // key. 46 DisconnectPeer func(*secp256k1.PublicKey) error 47 48 // WalletBalance is a function closure that should return the current 49 // available balance of the backing wallet. 50 WalletBalance func() (dcrutil.Amount, error) 51 52 // Graph is an abstract channel graph that the Heuristic and the Agent 53 // will use to make decisions w.r.t channel allocation and placement 54 // within the graph. 55 Graph ChannelGraph 56 57 // Constraints is the set of constraints the autopilot must adhere to 58 // when opening channels. 59 Constraints AgentConstraints 60 61 // TODO(roasbeef): add additional signals from fee rates and revenue of 62 // currently opened channels 63 } 64 65 // channelState is a type that represents the set of active channels of the 66 // backing LN node that the Agent should be aware of. This type contains a few 67 // helper utility methods. 68 type channelState map[lnwire.ShortChannelID]LocalChannel 69 70 // Channels returns a slice of all the active channels. 71 func (c channelState) Channels() []LocalChannel { 72 chans := make([]LocalChannel, 0, len(c)) 73 for _, channel := range c { 74 chans = append(chans, channel) 75 } 76 return chans 77 } 78 79 // ConnectedNodes returns the set of nodes we currently have a channel with. 80 // This information is needed as we want to avoid making repeated channels with 81 // any node. 82 func (c channelState) ConnectedNodes() map[NodeID]struct{} { 83 nodes := make(map[NodeID]struct{}) 84 for _, channels := range c { 85 nodes[channels.Node] = struct{}{} 86 } 87 88 // TODO(roasbeef): add outgoing, nodes, allow incoming and outgoing to 89 // per node 90 // * only add node is chan as funding amt set 91 92 return nodes 93 } 94 95 // Agent implements a closed-loop control system which seeks to autonomously 96 // optimize the allocation of base units within channels throughput the 97 // network's channel graph. An agent is configurable by swapping out different 98 // AttachmentHeuristic strategies. The agent uses external signals such as the 99 // wallet balance changing, or new channels being opened/closed for the local 100 // node as an indicator to re-examine its internal state, and the amount of 101 // available funds in order to make updated decisions w.r.t the channel graph. 102 // The Agent will automatically open, close, and splice in/out channel as 103 // necessary for it to step closer to its optimal state. 104 // 105 // TODO(roasbeef): prob re-word 106 type Agent struct { 107 started sync.Once 108 stopped sync.Once 109 110 // cfg houses the configuration state of the Ant. 111 cfg Config 112 113 // chanState tracks the current set of open channels. 114 chanState channelState 115 chanStateMtx sync.Mutex 116 117 // stateUpdates is a channel that any external state updates that may 118 // affect the heuristics of the agent will be sent over. 119 stateUpdates chan interface{} 120 121 // balanceUpdates is a channel where notifications about updates to the 122 // wallet's balance will be sent. This channel will be buffered to 123 // ensure we have at most one pending update of this type to handle at 124 // a given time. 125 balanceUpdates chan *balanceUpdate 126 127 // nodeUpdates is a channel that changes to the graph node landscape 128 // will be sent over. This channel will be buffered to ensure we have 129 // at most one pending update of this type to handle at a given time. 130 nodeUpdates chan *nodeUpdates 131 132 // pendingOpenUpdates is a channel where updates about channel pending 133 // opening will be sent. This channel will be buffered to ensure we 134 // have at most one pending update of this type to handle at a given 135 // time. 136 pendingOpenUpdates chan *chanPendingOpenUpdate 137 138 // chanOpenFailures is a channel where updates about channel open 139 // failures will be sent. This channel will be buffered to ensure we 140 // have at most one pending update of this type to handle at a given 141 // time. 142 chanOpenFailures chan *chanOpenFailureUpdate 143 144 // heuristicUpdates is a channel where updates from active heurstics 145 // will be sent. 146 heuristicUpdates chan *heuristicUpdate 147 148 // totalBalance is the total number of base units the backing wallet is 149 // known to control at any given instance. This value will be updated 150 // when the agent receives external balance update signals. 151 totalBalance dcrutil.Amount 152 153 // failedNodes lists nodes that we've previously attempted to initiate 154 // channels with, but didn't succeed. 155 failedNodes map[NodeID]struct{} 156 157 // pendingConns tracks the nodes that we are attempting to make 158 // connections to. This prevents us from making duplicate connection 159 // requests to the same node. 160 pendingConns map[NodeID]struct{} 161 162 // pendingOpens tracks the channels that we've requested to be 163 // initiated, but haven't yet been confirmed as being fully opened. 164 // This state is required as otherwise, we may go over our allotted 165 // channel limit, or open multiple channels to the same node. 166 pendingOpens map[NodeID]LocalChannel 167 pendingMtx sync.Mutex 168 169 quit chan struct{} 170 wg sync.WaitGroup 171 } 172 173 // New creates a new instance of the Agent instantiated using the passed 174 // configuration and initial channel state. The initial channel state slice 175 // should be populated with the set of Channels that are currently opened by 176 // the backing Lightning Node. 177 func New(cfg Config, initialState []LocalChannel) (*Agent, error) { 178 a := &Agent{ 179 cfg: cfg, 180 chanState: make(map[lnwire.ShortChannelID]LocalChannel), 181 quit: make(chan struct{}), 182 stateUpdates: make(chan interface{}), 183 balanceUpdates: make(chan *balanceUpdate, 1), 184 nodeUpdates: make(chan *nodeUpdates, 1), 185 chanOpenFailures: make(chan *chanOpenFailureUpdate, 1), 186 heuristicUpdates: make(chan *heuristicUpdate, 1), 187 pendingOpenUpdates: make(chan *chanPendingOpenUpdate, 1), 188 failedNodes: make(map[NodeID]struct{}), 189 pendingConns: make(map[NodeID]struct{}), 190 pendingOpens: make(map[NodeID]LocalChannel), 191 } 192 193 for _, c := range initialState { 194 a.chanState[c.ChanID] = c 195 } 196 197 return a, nil 198 } 199 200 // Start starts the agent along with any goroutines it needs to perform its 201 // normal duties. 202 func (a *Agent) Start() error { 203 var err error 204 a.started.Do(func() { 205 err = a.start() 206 }) 207 return err 208 } 209 210 func (a *Agent) start() error { 211 rand.Seed(time.Now().Unix()) 212 log.Infof("Autopilot Agent starting") 213 214 a.wg.Add(1) 215 go a.controller() 216 217 return nil 218 } 219 220 // Stop signals the Agent to gracefully shutdown. This function will block 221 // until all goroutines have exited. 222 func (a *Agent) Stop() error { 223 var err error 224 a.stopped.Do(func() { 225 err = a.stop() 226 }) 227 return err 228 } 229 230 func (a *Agent) stop() error { 231 log.Infof("Autopilot Agent stopping") 232 233 close(a.quit) 234 a.wg.Wait() 235 236 return nil 237 } 238 239 // balanceUpdate is a type of external state update that reflects an 240 // increase/decrease in the funds currently available to the wallet. 241 type balanceUpdate struct { 242 } 243 244 // nodeUpdates is a type of external state update that reflects an addition or 245 // modification in channel graph node membership. 246 type nodeUpdates struct{} 247 248 // chanOpenUpdate is a type of external state update that indicates a new 249 // channel has been opened, either by the Agent itself (within the main 250 // controller loop), or by an external user to the system. 251 type chanOpenUpdate struct { 252 newChan LocalChannel 253 } 254 255 // chanPendingOpenUpdate is a type of external state update that indicates a new 256 // channel has been opened, either by the agent itself or an external subsystem, 257 // but is still pending. 258 type chanPendingOpenUpdate struct{} 259 260 // chanOpenFailureUpdate is a type of external state update that indicates 261 // a previous channel open failed, and that it might be possible to try again. 262 type chanOpenFailureUpdate struct{} 263 264 // heuristicUpdate is an update sent when one of the autopilot heuristics has 265 // changed, and prompts the agent to make a new attempt at opening more 266 // channels. 267 type heuristicUpdate struct { 268 heuristic AttachmentHeuristic 269 } 270 271 // chanCloseUpdate is a type of external state update that indicates that the 272 // backing Lightning Node has closed a previously open channel. 273 type chanCloseUpdate struct { 274 closedChans []lnwire.ShortChannelID 275 } 276 277 // OnBalanceChange is a callback that should be executed each time the balance 278 // of the backing wallet changes. 279 func (a *Agent) OnBalanceChange() { 280 select { 281 case a.balanceUpdates <- &balanceUpdate{}: 282 default: 283 } 284 } 285 286 // OnNodeUpdates is a callback that should be executed each time our channel 287 // graph has new nodes or their node announcements are updated. 288 func (a *Agent) OnNodeUpdates() { 289 select { 290 case a.nodeUpdates <- &nodeUpdates{}: 291 default: 292 } 293 } 294 295 // OnChannelOpen is a callback that should be executed each time a new channel 296 // is manually opened by the user or any system outside the autopilot agent. 297 func (a *Agent) OnChannelOpen(c LocalChannel) { 298 a.wg.Add(1) 299 go func() { 300 defer a.wg.Done() 301 302 select { 303 case a.stateUpdates <- &chanOpenUpdate{newChan: c}: 304 case <-a.quit: 305 } 306 }() 307 } 308 309 // OnChannelPendingOpen is a callback that should be executed each time a new 310 // channel is opened, either by the agent or an external subsystems, but is 311 // still pending. 312 func (a *Agent) OnChannelPendingOpen() { 313 select { 314 case a.pendingOpenUpdates <- &chanPendingOpenUpdate{}: 315 default: 316 } 317 } 318 319 // OnChannelOpenFailure is a callback that should be executed when the 320 // autopilot has attempted to open a channel, but failed. In this case we can 321 // retry channel creation with a different node. 322 func (a *Agent) OnChannelOpenFailure() { 323 select { 324 case a.chanOpenFailures <- &chanOpenFailureUpdate{}: 325 default: 326 } 327 } 328 329 // OnChannelClose is a callback that should be executed each time a prior 330 // channel has been closed for any reason. This includes regular 331 // closes, force closes, and channel breaches. 332 func (a *Agent) OnChannelClose(closedChans ...lnwire.ShortChannelID) { 333 a.wg.Add(1) 334 go func() { 335 defer a.wg.Done() 336 337 select { 338 case a.stateUpdates <- &chanCloseUpdate{closedChans: closedChans}: 339 case <-a.quit: 340 } 341 }() 342 } 343 344 // OnHeuristicUpdate is a method called when a heuristic has been updated, to 345 // trigger the agent to do a new state assessment. 346 func (a *Agent) OnHeuristicUpdate(h AttachmentHeuristic) { 347 select { 348 case a.heuristicUpdates <- &heuristicUpdate{ 349 heuristic: h, 350 }: 351 default: 352 } 353 } 354 355 // mergeNodeMaps merges the Agent's set of nodes that it already has active 356 // channels open to, with the other sets of nodes that should be removed from 357 // consideration during heuristic selection. This ensures that the Agent doesn't 358 // attempt to open any "duplicate" channels to the same node. 359 func mergeNodeMaps(c map[NodeID]LocalChannel, 360 skips ...map[NodeID]struct{}) map[NodeID]struct{} { 361 362 numNodes := len(c) 363 for _, skip := range skips { 364 numNodes += len(skip) 365 } 366 367 res := make(map[NodeID]struct{}, numNodes) 368 for nodeID := range c { 369 res[nodeID] = struct{}{} 370 } 371 for _, skip := range skips { 372 for nodeID := range skip { 373 res[nodeID] = struct{}{} 374 } 375 } 376 377 return res 378 } 379 380 // mergeChanState merges the Agent's set of active channels, with the set of 381 // channels awaiting confirmation. This ensures that the agent doesn't go over 382 // the prescribed channel limit or fund allocation limit. 383 func mergeChanState(pendingChans map[NodeID]LocalChannel, 384 activeChans channelState) []LocalChannel { 385 386 numChans := len(pendingChans) + len(activeChans) 387 totalChans := make([]LocalChannel, 0, numChans) 388 389 totalChans = append(totalChans, activeChans.Channels()...) 390 for _, pendingChan := range pendingChans { 391 totalChans = append(totalChans, pendingChan) 392 } 393 394 return totalChans 395 } 396 397 // controller implements the closed-loop control system of the Agent. The 398 // controller will make a decision w.r.t channel placement within the graph 399 // based on: its current internal state of the set of active channels open, 400 // and external state changes as a result of decisions it makes w.r.t channel 401 // allocation, or attributes affecting its control loop being updated by the 402 // backing Lightning Node. 403 func (a *Agent) controller() { 404 defer a.wg.Done() 405 406 // We'll start off by assigning our starting balance, and injecting 407 // that amount as an initial wake up to the main controller goroutine. 408 a.OnBalanceChange() 409 410 // TODO(roasbeef): do we in fact need to maintain order? 411 // * use sync.Cond if so 412 updateBalance := func() { 413 newBalance, err := a.cfg.WalletBalance() 414 if err != nil { 415 log.Warnf("unable to update wallet balance: %v", err) 416 return 417 } 418 419 a.totalBalance = newBalance 420 } 421 422 // TODO(roasbeef): add 10-minute wake up timer 423 for { 424 select { 425 // A new external signal has arrived. We'll use this to update 426 // our internal state, then determine if we should trigger a 427 // channel state modification (open/close, splice in/out). 428 case signal := <-a.stateUpdates: 429 log.Infof("Processing new external signal") 430 431 switch update := signal.(type) { 432 // A new channel has been opened successfully. This was 433 // either opened by the Agent, or an external system 434 // that is able to drive the Lightning Node. 435 case *chanOpenUpdate: 436 log.Debugf("New channel successfully opened, "+ 437 "updating state with: %v", 438 spew.Sdump(update.newChan)) 439 440 newChan := update.newChan 441 a.chanStateMtx.Lock() 442 a.chanState[newChan.ChanID] = newChan 443 a.chanStateMtx.Unlock() 444 445 a.pendingMtx.Lock() 446 delete(a.pendingOpens, newChan.Node) 447 a.pendingMtx.Unlock() 448 449 updateBalance() 450 // A channel has been closed, this may free up an 451 // available slot, triggering a new channel update. 452 case *chanCloseUpdate: 453 log.Debugf("Applying closed channel "+ 454 "updates: %v", 455 spew.Sdump(update.closedChans)) 456 457 a.chanStateMtx.Lock() 458 for _, closedChan := range update.closedChans { 459 delete(a.chanState, closedChan) 460 } 461 a.chanStateMtx.Unlock() 462 463 updateBalance() 464 } 465 466 // A new channel has been opened by the agent or an external 467 // subsystem, but is still pending confirmation. 468 case <-a.pendingOpenUpdates: 469 updateBalance() 470 471 // The balance of the backing wallet has changed, if more funds 472 // are now available, we may attempt to open up an additional 473 // channel, or splice in funds to an existing one. 474 case <-a.balanceUpdates: 475 log.Debug("Applying external balance state update") 476 477 updateBalance() 478 479 // The channel we tried to open previously failed for whatever 480 // reason. 481 case <-a.chanOpenFailures: 482 log.Debug("Retrying after previous channel open " + 483 "failure.") 484 485 updateBalance() 486 487 // New nodes have been added to the graph or their node 488 // announcements have been updated. We will consider opening 489 // channels to these nodes if we haven't stabilized. 490 case <-a.nodeUpdates: 491 log.Debugf("Node updates received, assessing " + 492 "need for more channels") 493 494 // Any of the deployed heuristics has been updated, check 495 // whether we have new channel candidates available. 496 case upd := <-a.heuristicUpdates: 497 log.Debugf("Heuristic %v updated, assessing need for "+ 498 "more channels", upd.heuristic.Name()) 499 500 // The agent has been signalled to exit, so we'll bail out 501 // immediately. 502 case <-a.quit: 503 return 504 } 505 506 a.pendingMtx.Lock() 507 log.Debugf("Pending channels: %v", spew.Sdump(a.pendingOpens)) 508 a.pendingMtx.Unlock() 509 510 // With all the updates applied, we'll obtain a set of the 511 // current active channels (confirmed channels), and also 512 // factor in our set of unconfirmed channels. 513 a.chanStateMtx.Lock() 514 a.pendingMtx.Lock() 515 totalChans := mergeChanState(a.pendingOpens, a.chanState) 516 a.pendingMtx.Unlock() 517 a.chanStateMtx.Unlock() 518 519 // Now that we've updated our internal state, we'll consult our 520 // channel attachment heuristic to determine if we can open 521 // up any additional channels while staying within our 522 // constraints. 523 availableFunds, numChans := a.cfg.Constraints.ChannelBudget( 524 totalChans, a.totalBalance, 525 ) 526 switch { 527 case numChans == 0: 528 continue 529 530 // If the amount is too small, we don't want to attempt opening 531 // another channel. 532 case availableFunds == 0: 533 continue 534 case availableFunds < a.cfg.Constraints.MinChanSize(): 535 continue 536 } 537 538 log.Infof("Triggering attachment directive dispatch, "+ 539 "total_funds=%v", a.totalBalance) 540 541 err := a.openChans(availableFunds, numChans, totalChans) 542 if err != nil { 543 log.Errorf("Unable to open channels: %v", err) 544 } 545 } 546 } 547 548 // openChans queries the agent's heuristic for a set of channel candidates, and 549 // attempts to open channels to them. 550 func (a *Agent) openChans(availableFunds dcrutil.Amount, numChans uint32, 551 totalChans []LocalChannel) error { 552 553 // As channel size we'll use the maximum channel size available. 554 chanSize := a.cfg.Constraints.MaxChanSize() 555 if availableFunds < chanSize { 556 chanSize = availableFunds 557 } 558 559 if chanSize < a.cfg.Constraints.MinChanSize() { 560 return fmt.Errorf("not enough funds available to open a " + 561 "single channel") 562 } 563 564 // We're to attempt an attachment so we'll obtain the set of 565 // nodes that we currently have channels with so we avoid 566 // duplicate edges. 567 a.chanStateMtx.Lock() 568 connectedNodes := a.chanState.ConnectedNodes() 569 a.chanStateMtx.Unlock() 570 571 for nID := range connectedNodes { 572 log.Tracef("Skipping node %x with open channel", nID[:]) 573 } 574 575 a.pendingMtx.Lock() 576 577 for nID := range a.pendingOpens { 578 log.Tracef("Skipping node %x with pending channel open", nID[:]) 579 } 580 581 for nID := range a.pendingConns { 582 log.Tracef("Skipping node %x with pending connection", nID[:]) 583 } 584 585 for nID := range a.failedNodes { 586 log.Tracef("Skipping failed node %v", nID[:]) 587 } 588 589 nodesToSkip := mergeNodeMaps(a.pendingOpens, 590 a.pendingConns, connectedNodes, a.failedNodes, 591 ) 592 593 a.pendingMtx.Unlock() 594 595 // Gather the set of all nodes in the graph, except those we 596 // want to skip. 597 selfPubBytes := a.cfg.Self.SerializeCompressed() 598 nodes := make(map[NodeID]struct{}) 599 addresses := make(map[NodeID][]net.Addr) 600 if err := a.cfg.Graph.ForEachNode(func(node Node) error { 601 nID := NodeID(node.PubKey()) 602 603 // If we come across ourselves, them we'll continue in 604 // order to avoid attempting to make a channel with 605 // ourselves. 606 if bytes.Equal(nID[:], selfPubBytes) { 607 log.Tracef("Skipping self node %x", nID[:]) 608 return nil 609 } 610 611 // If the node has no known addresses, we cannot connect to it, 612 // so we'll skip it. 613 addrs := node.Addrs() 614 if len(addrs) == 0 { 615 log.Tracef("Skipping node %x since no addresses known", 616 nID[:]) 617 return nil 618 } 619 addresses[nID] = addrs 620 621 // Additionally, if this node is in the blacklist, then 622 // we'll skip it. 623 if _, ok := nodesToSkip[nID]; ok { 624 log.Tracef("Skipping blacklisted node %x", nID[:]) 625 return nil 626 } 627 628 nodes[nID] = struct{}{} 629 return nil 630 }); err != nil { 631 return fmt.Errorf("unable to get graph nodes: %v", err) 632 } 633 634 // Use the heuristic to calculate a score for each node in the 635 // graph. 636 log.Debugf("Scoring %d nodes for chan_size=%v", len(nodes), chanSize) 637 scores, err := a.cfg.Heuristic.NodeScores( 638 a.cfg.Graph, totalChans, chanSize, nodes, 639 ) 640 if err != nil { 641 return fmt.Errorf("unable to calculate node scores : %v", err) 642 } 643 644 log.Debugf("Got scores for %d nodes", len(scores)) 645 646 // Now use the score to make a weighted choice which nodes to attempt 647 // to open channels to. 648 scores, err = chooseN(numChans, scores) 649 if err != nil { 650 return fmt.Errorf("unable to make weighted choice: %v", 651 err) 652 } 653 654 chanCandidates := make(map[NodeID]*AttachmentDirective) 655 for nID := range scores { 656 log.Tracef("Creating attachment directive for chosen node %x", 657 nID[:]) 658 659 // Track the available funds we have left. 660 if availableFunds < chanSize { 661 chanSize = availableFunds 662 } 663 availableFunds -= chanSize 664 665 // If we run out of funds, we can break early. 666 if chanSize < a.cfg.Constraints.MinChanSize() { 667 log.Tracef("Chan size %v too small to satisfy min "+ 668 "channel size %v, breaking", chanSize, 669 a.cfg.Constraints.MinChanSize()) 670 break 671 } 672 673 chanCandidates[nID] = &AttachmentDirective{ 674 NodeID: nID, 675 ChanAmt: chanSize, 676 Addrs: addresses[nID], 677 } 678 } 679 680 if len(chanCandidates) == 0 { 681 log.Infof("No eligible candidates to connect to") 682 return nil 683 } 684 685 log.Infof("Attempting to execute channel attachment "+ 686 "directives: %v", spew.Sdump(chanCandidates)) 687 688 // Before proceeding, check to see if we have any slots 689 // available to open channels. If there are any, we will attempt 690 // to dispatch the retrieved directives since we can't be 691 // certain which ones may actually succeed. If too many 692 // connections succeed, they will be ignored and made 693 // available to future heuristic selections. 694 a.pendingMtx.Lock() 695 defer a.pendingMtx.Unlock() 696 if uint16(len(a.pendingOpens)) >= a.cfg.Constraints.MaxPendingOpens() { 697 log.Debugf("Reached cap of %v pending "+ 698 "channel opens, will retry "+ 699 "after success/failure", 700 a.cfg.Constraints.MaxPendingOpens()) 701 return nil 702 } 703 704 // For each recommended attachment directive, we'll launch a 705 // new goroutine to attempt to carry out the directive. If any 706 // of these succeed, then we'll receive a new state update, 707 // taking us back to the top of our controller loop. 708 for _, chanCandidate := range chanCandidates { 709 // Skip candidates which we are already trying 710 // to establish a connection with. 711 nodeID := chanCandidate.NodeID 712 if _, ok := a.pendingConns[nodeID]; ok { 713 continue 714 } 715 a.pendingConns[nodeID] = struct{}{} 716 717 a.wg.Add(1) 718 go a.executeDirective(*chanCandidate) 719 } 720 return nil 721 } 722 723 // executeDirective attempts to connect to the channel candidate specified by 724 // the given attachment directive, and open a channel of the given size. 725 // 726 // NOTE: MUST be run as a goroutine. 727 func (a *Agent) executeDirective(directive AttachmentDirective) { 728 defer a.wg.Done() 729 730 // We'll start out by attempting to connect to the peer in order to 731 // begin the funding workflow. 732 nodeID := directive.NodeID 733 pub, err := secp256k1.ParsePubKey(nodeID[:]) 734 if err != nil { 735 log.Errorf("Unable to parse pubkey %x: %v", nodeID, err) 736 return 737 } 738 739 connected := make(chan bool) 740 errChan := make(chan error) 741 742 // To ensure a call to ConnectToPeer doesn't block the agent from 743 // shutting down, we'll launch it in a non-waitgrouped goroutine, that 744 // will signal when a result is returned. 745 // TODO(halseth): use DialContext to cancel on transport level. 746 go func() { 747 alreadyConnected, err := a.cfg.ConnectToPeer( 748 pub, directive.Addrs, 749 ) 750 if err != nil { 751 select { 752 case errChan <- err: 753 case <-a.quit: 754 } 755 return 756 } 757 758 select { 759 case connected <- alreadyConnected: 760 case <-a.quit: 761 return 762 } 763 }() 764 765 var alreadyConnected bool 766 select { 767 case alreadyConnected = <-connected: 768 case err = <-errChan: 769 case <-a.quit: 770 return 771 } 772 773 if err != nil { 774 log.Warnf("Unable to connect to %x: %v", 775 pub.SerializeCompressed(), err) 776 777 // Since we failed to connect to them, we'll mark them as 778 // failed so that we don't attempt to connect to them again. 779 a.pendingMtx.Lock() 780 delete(a.pendingConns, nodeID) 781 a.failedNodes[nodeID] = struct{}{} 782 a.pendingMtx.Unlock() 783 784 // Finally, we'll trigger the agent to select new peers to 785 // connect to. 786 a.OnChannelOpenFailure() 787 788 return 789 } 790 791 // The connection was successful, though before progressing we must 792 // check that we have not already met our quota for max pending open 793 // channels. This can happen if multiple directives were spawned but 794 // fewer slots were available, and other successful attempts finished 795 // first. 796 a.pendingMtx.Lock() 797 if uint16(len(a.pendingOpens)) >= a.cfg.Constraints.MaxPendingOpens() { 798 // Since we've reached our max number of pending opens, we'll 799 // disconnect this peer and exit. However, if we were 800 // previously connected to them, then we'll make sure to 801 // maintain the connection alive. 802 if alreadyConnected { 803 // Since we succeeded in connecting, we won't add this 804 // peer to the failed nodes map, but we will remove it 805 // from a.pendingConns so that it can be retried in the 806 // future. 807 delete(a.pendingConns, nodeID) 808 a.pendingMtx.Unlock() 809 return 810 } 811 812 err = a.cfg.DisconnectPeer(pub) 813 if err != nil { 814 log.Warnf("Unable to disconnect peer %x: %v", 815 pub.SerializeCompressed(), err) 816 } 817 818 // Now that we have disconnected, we can remove this node from 819 // our pending conns map, permitting subsequent connection 820 // attempts. 821 delete(a.pendingConns, nodeID) 822 a.pendingMtx.Unlock() 823 return 824 } 825 826 // If we were successful, we'll track this peer in our set of pending 827 // opens. We do this here to ensure we don't stall on selecting new 828 // peers if the connection attempt happens to take too long. 829 delete(a.pendingConns, nodeID) 830 a.pendingOpens[nodeID] = LocalChannel{ 831 Balance: directive.ChanAmt, 832 Node: nodeID, 833 } 834 a.pendingMtx.Unlock() 835 836 // We can then begin the funding workflow with this peer. 837 err = a.cfg.ChanController.OpenChannel(pub, directive.ChanAmt) 838 if err != nil { 839 log.Warnf("Unable to open channel to %x of %v: %v", 840 pub.SerializeCompressed(), directive.ChanAmt, err) 841 842 // As the attempt failed, we'll clear the peer from the set of 843 // pending opens and mark them as failed so we don't attempt to 844 // open a channel to them again. 845 a.pendingMtx.Lock() 846 delete(a.pendingOpens, nodeID) 847 a.failedNodes[nodeID] = struct{}{} 848 a.pendingMtx.Unlock() 849 850 // Trigger the agent to re-evaluate everything and possibly 851 // retry with a different node. 852 a.OnChannelOpenFailure() 853 854 // Finally, we should also disconnect the peer if we weren't 855 // already connected to them beforehand by an external 856 // subsystem. 857 if alreadyConnected { 858 return 859 } 860 861 err = a.cfg.DisconnectPeer(pub) 862 if err != nil { 863 log.Warnf("Unable to disconnect peer %x: %v", 864 pub.SerializeCompressed(), err) 865 } 866 } 867 868 // Since the channel open was successful and is currently pending, 869 // we'll trigger the autopilot agent to query for more peers. 870 // TODO(halseth): this triggers a new loop before all the new channels 871 // are added to the pending channels map. Should add before executing 872 // directive in goroutine? 873 a.OnChannelPendingOpen() 874 }