github.com/palcoin-project/palcd@v1.0.0/connmgr/connmanager.go (about)

     1  // Copyright (c) 2016 The btcsuite developers
     2  // Use of this source code is governed by an ISC
     3  // license that can be found in the LICENSE file.
     4  
     5  package connmgr
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"net"
    11  	"sync"
    12  	"sync/atomic"
    13  	"time"
    14  )
    15  
    16  // maxFailedAttempts is the maximum number of successive failed connection
    17  // attempts after which network failure is assumed and new connections will
    18  // be delayed by the configured retry duration.
    19  const maxFailedAttempts = 25
    20  
    21  var (
    22  	//ErrDialNil is used to indicate that Dial cannot be nil in the configuration.
    23  	ErrDialNil = errors.New("Config: Dial cannot be nil")
    24  
    25  	// maxRetryDuration is the max duration of time retrying of a persistent
    26  	// connection is allowed to grow to.  This is necessary since the retry
    27  	// logic uses a backoff mechanism which increases the interval base times
    28  	// the number of retries that have been done.
    29  	maxRetryDuration = time.Minute * 5
    30  
    31  	// defaultRetryDuration is the default duration of time for retrying
    32  	// persistent connections.
    33  	defaultRetryDuration = time.Second * 5
    34  
    35  	// defaultTargetOutbound is the default number of outbound connections to
    36  	// maintain.
    37  	defaultTargetOutbound = uint32(8)
    38  )
    39  
    40  // ConnState represents the state of the requested connection.
    41  type ConnState uint8
    42  
    43  // ConnState can be either pending, established, disconnected or failed.  When
    44  // a new connection is requested, it is attempted and categorized as
    45  // established or failed depending on the connection result.  An established
    46  // connection which was disconnected is categorized as disconnected.
    47  const (
    48  	ConnPending ConnState = iota
    49  	ConnFailing
    50  	ConnCanceled
    51  	ConnEstablished
    52  	ConnDisconnected
    53  )
    54  
    55  // ConnReq is the connection request to a network address. If permanent, the
    56  // connection will be retried on disconnection.
    57  type ConnReq struct {
    58  	// The following variables must only be used atomically.
    59  	id uint64
    60  
    61  	Addr      net.Addr
    62  	Permanent bool
    63  
    64  	conn       net.Conn
    65  	state      ConnState
    66  	stateMtx   sync.RWMutex
    67  	retryCount uint32
    68  }
    69  
    70  // updateState updates the state of the connection request.
    71  func (c *ConnReq) updateState(state ConnState) {
    72  	c.stateMtx.Lock()
    73  	c.state = state
    74  	c.stateMtx.Unlock()
    75  }
    76  
    77  // ID returns a unique identifier for the connection request.
    78  func (c *ConnReq) ID() uint64 {
    79  	return atomic.LoadUint64(&c.id)
    80  }
    81  
    82  // State is the connection state of the requested connection.
    83  func (c *ConnReq) State() ConnState {
    84  	c.stateMtx.RLock()
    85  	state := c.state
    86  	c.stateMtx.RUnlock()
    87  	return state
    88  }
    89  
    90  // String returns a human-readable string for the connection request.
    91  func (c *ConnReq) String() string {
    92  	if c.Addr == nil || c.Addr.String() == "" {
    93  		return fmt.Sprintf("reqid %d", atomic.LoadUint64(&c.id))
    94  	}
    95  	return fmt.Sprintf("%s (reqid %d)", c.Addr, atomic.LoadUint64(&c.id))
    96  }
    97  
    98  // Config holds the configuration options related to the connection manager.
    99  type Config struct {
   100  	// Listeners defines a slice of listeners for which the connection
   101  	// manager will take ownership of and accept connections.  When a
   102  	// connection is accepted, the OnAccept handler will be invoked with the
   103  	// connection.  Since the connection manager takes ownership of these
   104  	// listeners, they will be closed when the connection manager is
   105  	// stopped.
   106  	//
   107  	// This field will not have any effect if the OnAccept field is not
   108  	// also specified.  It may be nil if the caller does not wish to listen
   109  	// for incoming connections.
   110  	Listeners []net.Listener
   111  
   112  	// OnAccept is a callback that is fired when an inbound connection is
   113  	// accepted.  It is the caller's responsibility to close the connection.
   114  	// Failure to close the connection will result in the connection manager
   115  	// believing the connection is still active and thus have undesirable
   116  	// side effects such as still counting toward maximum connection limits.
   117  	//
   118  	// This field will not have any effect if the Listeners field is not
   119  	// also specified since there couldn't possibly be any accepted
   120  	// connections in that case.
   121  	OnAccept func(net.Conn)
   122  
   123  	// TargetOutbound is the number of outbound network connections to
   124  	// maintain. Defaults to 8.
   125  	TargetOutbound uint32
   126  
   127  	// RetryDuration is the duration to wait before retrying connection
   128  	// requests. Defaults to 5s.
   129  	RetryDuration time.Duration
   130  
   131  	// OnConnection is a callback that is fired when a new outbound
   132  	// connection is established.
   133  	OnConnection func(*ConnReq, net.Conn)
   134  
   135  	// OnDisconnection is a callback that is fired when an outbound
   136  	// connection is disconnected.
   137  	OnDisconnection func(*ConnReq)
   138  
   139  	// GetNewAddress is a way to get an address to make a network connection
   140  	// to.  If nil, no new connections will be made automatically.
   141  	GetNewAddress func() (net.Addr, error)
   142  
   143  	// Dial connects to the address on the named network. It cannot be nil.
   144  	Dial func(net.Addr) (net.Conn, error)
   145  }
   146  
   147  // registerPending is used to register a pending connection attempt. By
   148  // registering pending connection attempts we allow callers to cancel pending
   149  // connection attempts before their successful or in the case they're not
   150  // longer wanted.
   151  type registerPending struct {
   152  	c    *ConnReq
   153  	done chan struct{}
   154  }
   155  
   156  // handleConnected is used to queue a successful connection.
   157  type handleConnected struct {
   158  	c    *ConnReq
   159  	conn net.Conn
   160  }
   161  
   162  // handleDisconnected is used to remove a connection.
   163  type handleDisconnected struct {
   164  	id    uint64
   165  	retry bool
   166  }
   167  
   168  // handleFailed is used to remove a pending connection.
   169  type handleFailed struct {
   170  	c   *ConnReq
   171  	err error
   172  }
   173  
   174  // ConnManager provides a manager to handle network connections.
   175  type ConnManager struct {
   176  	// The following variables must only be used atomically.
   177  	connReqCount uint64
   178  	start        int32
   179  	stop         int32
   180  
   181  	cfg            Config
   182  	wg             sync.WaitGroup
   183  	failedAttempts uint64
   184  	requests       chan interface{}
   185  	quit           chan struct{}
   186  }
   187  
   188  // handleFailedConn handles a connection failed due to a disconnect or any
   189  // other failure. If permanent, it retries the connection after the configured
   190  // retry duration. Otherwise, if required, it makes a new connection request.
   191  // After maxFailedConnectionAttempts new connections will be retried after the
   192  // configured retry duration.
   193  func (cm *ConnManager) handleFailedConn(c *ConnReq) {
   194  	if atomic.LoadInt32(&cm.stop) != 0 {
   195  		return
   196  	}
   197  	if c.Permanent {
   198  		c.retryCount++
   199  		d := time.Duration(c.retryCount) * cm.cfg.RetryDuration
   200  		if d > maxRetryDuration {
   201  			d = maxRetryDuration
   202  		}
   203  		log.Debugf("Retrying connection to %v in %v", c, d)
   204  		time.AfterFunc(d, func() {
   205  			cm.Connect(c)
   206  		})
   207  	} else if cm.cfg.GetNewAddress != nil {
   208  		cm.failedAttempts++
   209  		if cm.failedAttempts >= maxFailedAttempts {
   210  			log.Debugf("Max failed connection attempts reached: [%d] "+
   211  				"-- retrying connection in: %v", maxFailedAttempts,
   212  				cm.cfg.RetryDuration)
   213  			time.AfterFunc(cm.cfg.RetryDuration, func() {
   214  				cm.NewConnReq()
   215  			})
   216  		} else {
   217  			go cm.NewConnReq()
   218  		}
   219  	}
   220  }
   221  
   222  // connHandler handles all connection related requests.  It must be run as a
   223  // goroutine.
   224  //
   225  // The connection handler makes sure that we maintain a pool of active outbound
   226  // connections so that we remain connected to the network.  Connection requests
   227  // are processed and mapped by their assigned ids.
   228  func (cm *ConnManager) connHandler() {
   229  
   230  	var (
   231  		// pending holds all registered conn requests that have yet to
   232  		// succeed.
   233  		pending = make(map[uint64]*ConnReq)
   234  
   235  		// conns represents the set of all actively connected peers.
   236  		conns = make(map[uint64]*ConnReq, cm.cfg.TargetOutbound)
   237  	)
   238  
   239  out:
   240  	for {
   241  		select {
   242  		case req := <-cm.requests:
   243  			switch msg := req.(type) {
   244  
   245  			case registerPending:
   246  				connReq := msg.c
   247  				connReq.updateState(ConnPending)
   248  				pending[msg.c.id] = connReq
   249  				close(msg.done)
   250  
   251  			case handleConnected:
   252  				connReq := msg.c
   253  
   254  				if _, ok := pending[connReq.id]; !ok {
   255  					if msg.conn != nil {
   256  						msg.conn.Close()
   257  					}
   258  					log.Debugf("Ignoring connection for "+
   259  						"canceled connreq=%v", connReq)
   260  					continue
   261  				}
   262  
   263  				connReq.updateState(ConnEstablished)
   264  				connReq.conn = msg.conn
   265  				conns[connReq.id] = connReq
   266  				log.Debugf("Connected to %v", connReq)
   267  				connReq.retryCount = 0
   268  				cm.failedAttempts = 0
   269  
   270  				delete(pending, connReq.id)
   271  
   272  				if cm.cfg.OnConnection != nil {
   273  					go cm.cfg.OnConnection(connReq, msg.conn)
   274  				}
   275  
   276  			case handleDisconnected:
   277  				connReq, ok := conns[msg.id]
   278  				if !ok {
   279  					connReq, ok = pending[msg.id]
   280  					if !ok {
   281  						log.Errorf("Unknown connid=%d",
   282  							msg.id)
   283  						continue
   284  					}
   285  
   286  					// Pending connection was found, remove
   287  					// it from pending map if we should
   288  					// ignore a later, successful
   289  					// connection.
   290  					connReq.updateState(ConnCanceled)
   291  					log.Debugf("Canceling: %v", connReq)
   292  					delete(pending, msg.id)
   293  					continue
   294  
   295  				}
   296  
   297  				// An existing connection was located, mark as
   298  				// disconnected and execute disconnection
   299  				// callback.
   300  				log.Debugf("Disconnected from %v", connReq)
   301  				delete(conns, msg.id)
   302  
   303  				if connReq.conn != nil {
   304  					connReq.conn.Close()
   305  				}
   306  
   307  				if cm.cfg.OnDisconnection != nil {
   308  					go cm.cfg.OnDisconnection(connReq)
   309  				}
   310  
   311  				// All internal state has been cleaned up, if
   312  				// this connection is being removed, we will
   313  				// make no further attempts with this request.
   314  				if !msg.retry {
   315  					connReq.updateState(ConnDisconnected)
   316  					continue
   317  				}
   318  
   319  				// Otherwise, we will attempt a reconnection if
   320  				// we do not have enough peers, or if this is a
   321  				// persistent peer. The connection request is
   322  				// re added to the pending map, so that
   323  				// subsequent processing of connections and
   324  				// failures do not ignore the request.
   325  				if uint32(len(conns)) < cm.cfg.TargetOutbound ||
   326  					connReq.Permanent {
   327  
   328  					connReq.updateState(ConnPending)
   329  					log.Debugf("Reconnecting to %v",
   330  						connReq)
   331  					pending[msg.id] = connReq
   332  					cm.handleFailedConn(connReq)
   333  				}
   334  
   335  			case handleFailed:
   336  				connReq := msg.c
   337  
   338  				if _, ok := pending[connReq.id]; !ok {
   339  					log.Debugf("Ignoring connection for "+
   340  						"canceled conn req: %v", connReq)
   341  					continue
   342  				}
   343  
   344  				connReq.updateState(ConnFailing)
   345  				log.Debugf("Failed to connect to %v: %v",
   346  					connReq, msg.err)
   347  				cm.handleFailedConn(connReq)
   348  			}
   349  
   350  		case <-cm.quit:
   351  			break out
   352  		}
   353  	}
   354  
   355  	cm.wg.Done()
   356  	log.Trace("Connection handler done")
   357  }
   358  
   359  // NewConnReq creates a new connection request and connects to the
   360  // corresponding address.
   361  func (cm *ConnManager) NewConnReq() {
   362  	if atomic.LoadInt32(&cm.stop) != 0 {
   363  		return
   364  	}
   365  	if cm.cfg.GetNewAddress == nil {
   366  		return
   367  	}
   368  
   369  	c := &ConnReq{}
   370  	atomic.StoreUint64(&c.id, atomic.AddUint64(&cm.connReqCount, 1))
   371  
   372  	// Submit a request of a pending connection attempt to the connection
   373  	// manager. By registering the id before the connection is even
   374  	// established, we'll be able to later cancel the connection via the
   375  	// Remove method.
   376  	done := make(chan struct{})
   377  	select {
   378  	case cm.requests <- registerPending{c, done}:
   379  	case <-cm.quit:
   380  		return
   381  	}
   382  
   383  	// Wait for the registration to successfully add the pending conn req to
   384  	// the conn manager's internal state.
   385  	select {
   386  	case <-done:
   387  	case <-cm.quit:
   388  		return
   389  	}
   390  
   391  	addr, err := cm.cfg.GetNewAddress()
   392  	if err != nil {
   393  		select {
   394  		case cm.requests <- handleFailed{c, err}:
   395  		case <-cm.quit:
   396  		}
   397  		return
   398  	}
   399  
   400  	c.Addr = addr
   401  
   402  	cm.Connect(c)
   403  }
   404  
   405  // Connect assigns an id and dials a connection to the address of the
   406  // connection request.
   407  func (cm *ConnManager) Connect(c *ConnReq) {
   408  	if atomic.LoadInt32(&cm.stop) != 0 {
   409  		return
   410  	}
   411  
   412  	// During the time we wait for retry there is a chance that
   413  	// this connection was already cancelled
   414  	if c.State() == ConnCanceled {
   415  		log.Debugf("Ignoring connect for canceled connreq=%v", c)
   416  		return
   417  	}
   418  
   419  	if atomic.LoadUint64(&c.id) == 0 {
   420  		atomic.StoreUint64(&c.id, atomic.AddUint64(&cm.connReqCount, 1))
   421  
   422  		// Submit a request of a pending connection attempt to the
   423  		// connection manager. By registering the id before the
   424  		// connection is even established, we'll be able to later
   425  		// cancel the connection via the Remove method.
   426  		done := make(chan struct{})
   427  		select {
   428  		case cm.requests <- registerPending{c, done}:
   429  		case <-cm.quit:
   430  			return
   431  		}
   432  
   433  		// Wait for the registration to successfully add the pending
   434  		// conn req to the conn manager's internal state.
   435  		select {
   436  		case <-done:
   437  		case <-cm.quit:
   438  			return
   439  		}
   440  	}
   441  
   442  	log.Debugf("Attempting to connect to %v", c)
   443  
   444  	conn, err := cm.cfg.Dial(c.Addr)
   445  	if err != nil {
   446  		select {
   447  		case cm.requests <- handleFailed{c, err}:
   448  		case <-cm.quit:
   449  		}
   450  		return
   451  	}
   452  
   453  	select {
   454  	case cm.requests <- handleConnected{c, conn}:
   455  	case <-cm.quit:
   456  	}
   457  }
   458  
   459  // Disconnect disconnects the connection corresponding to the given connection
   460  // id. If permanent, the connection will be retried with an increasing backoff
   461  // duration.
   462  func (cm *ConnManager) Disconnect(id uint64) {
   463  	if atomic.LoadInt32(&cm.stop) != 0 {
   464  		return
   465  	}
   466  
   467  	select {
   468  	case cm.requests <- handleDisconnected{id, true}:
   469  	case <-cm.quit:
   470  	}
   471  }
   472  
   473  // Remove removes the connection corresponding to the given connection id from
   474  // known connections.
   475  //
   476  // NOTE: This method can also be used to cancel a lingering connection attempt
   477  // that hasn't yet succeeded.
   478  func (cm *ConnManager) Remove(id uint64) {
   479  	if atomic.LoadInt32(&cm.stop) != 0 {
   480  		return
   481  	}
   482  
   483  	select {
   484  	case cm.requests <- handleDisconnected{id, false}:
   485  	case <-cm.quit:
   486  	}
   487  }
   488  
   489  // listenHandler accepts incoming connections on a given listener.  It must be
   490  // run as a goroutine.
   491  func (cm *ConnManager) listenHandler(listener net.Listener) {
   492  	log.Infof("Server listening on %s", listener.Addr())
   493  	for atomic.LoadInt32(&cm.stop) == 0 {
   494  		conn, err := listener.Accept()
   495  		if err != nil {
   496  			// Only log the error if not forcibly shutting down.
   497  			if atomic.LoadInt32(&cm.stop) == 0 {
   498  				log.Errorf("Can't accept connection: %v", err)
   499  			}
   500  			continue
   501  		}
   502  		go cm.cfg.OnAccept(conn)
   503  	}
   504  
   505  	cm.wg.Done()
   506  	log.Tracef("Listener handler done for %s", listener.Addr())
   507  }
   508  
   509  // Start launches the connection manager and begins connecting to the network.
   510  func (cm *ConnManager) Start() {
   511  	// Already started?
   512  	if atomic.AddInt32(&cm.start, 1) != 1 {
   513  		return
   514  	}
   515  
   516  	log.Trace("Connection manager started")
   517  	cm.wg.Add(1)
   518  	go cm.connHandler()
   519  
   520  	// Start all the listeners so long as the caller requested them and
   521  	// provided a callback to be invoked when connections are accepted.
   522  	if cm.cfg.OnAccept != nil {
   523  		for _, listner := range cm.cfg.Listeners {
   524  			cm.wg.Add(1)
   525  			go cm.listenHandler(listner)
   526  		}
   527  	}
   528  
   529  	for i := atomic.LoadUint64(&cm.connReqCount); i < uint64(cm.cfg.TargetOutbound); i++ {
   530  		go cm.NewConnReq()
   531  	}
   532  }
   533  
   534  // Wait blocks until the connection manager halts gracefully.
   535  func (cm *ConnManager) Wait() {
   536  	cm.wg.Wait()
   537  }
   538  
   539  // Stop gracefully shuts down the connection manager.
   540  func (cm *ConnManager) Stop() {
   541  	if atomic.AddInt32(&cm.stop, 1) != 1 {
   542  		log.Warnf("Connection manager already stopped")
   543  		return
   544  	}
   545  
   546  	// Stop all the listeners.  There will not be any listeners if
   547  	// listening is disabled.
   548  	for _, listener := range cm.cfg.Listeners {
   549  		// Ignore the error since this is shutdown and there is no way
   550  		// to recover anyways.
   551  		_ = listener.Close()
   552  	}
   553  
   554  	close(cm.quit)
   555  	log.Trace("Connection manager stopped")
   556  }
   557  
   558  // New returns a new connection manager.
   559  // Use Start to start connecting to the network.
   560  func New(cfg *Config) (*ConnManager, error) {
   561  	if cfg.Dial == nil {
   562  		return nil, ErrDialNil
   563  	}
   564  	// Default to sane values
   565  	if cfg.RetryDuration <= 0 {
   566  		cfg.RetryDuration = defaultRetryDuration
   567  	}
   568  	if cfg.TargetOutbound == 0 {
   569  		cfg.TargetOutbound = defaultTargetOutbound
   570  	}
   571  	cm := ConnManager{
   572  		cfg:      *cfg, // Copy so caller can't mutate
   573  		requests: make(chan interface{}),
   574  		quit:     make(chan struct{}),
   575  	}
   576  	return &cm, nil
   577  }