github.com/yggdrasil-network/yggdrasil-go@v0.5.6/src/core/link.go (about)

     1  package core
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/hex"
     7  	"fmt"
     8  	"io"
     9  	"net"
    10  	"net/netip"
    11  	"net/url"
    12  	"strconv"
    13  	"strings"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	"github.com/Arceliar/phony"
    18  	"github.com/yggdrasil-network/yggdrasil-go/src/address"
    19  	"golang.org/x/crypto/blake2b"
    20  )
    21  
    22  type linkType int
    23  
    24  const (
    25  	linkTypePersistent linkType = iota // Statically configured
    26  	linkTypeEphemeral                  // Multicast discovered
    27  	linkTypeIncoming                   // Incoming connection
    28  )
    29  
    30  const defaultBackoffLimit = time.Second << 12 // 1h8m16s
    31  const minimumBackoffLimit = time.Second * 30
    32  
    33  type links struct {
    34  	phony.Inbox
    35  	core  *Core
    36  	tcp   *linkTCP   // TCP interface support
    37  	tls   *linkTLS   // TLS interface support
    38  	unix  *linkUNIX  // UNIX interface support
    39  	socks *linkSOCKS // SOCKS interface support
    40  	quic  *linkQUIC  // QUIC interface support
    41  	// _links can only be modified safely from within the links actor
    42  	_links map[linkInfo]*link // *link is nil if connection in progress
    43  }
    44  
    45  type linkProtocol interface {
    46  	dial(ctx context.Context, url *url.URL, info linkInfo, options linkOptions) (net.Conn, error)
    47  	listen(ctx context.Context, url *url.URL, sintf string) (net.Listener, error)
    48  }
    49  
    50  // linkInfo is used as a map key
    51  type linkInfo struct {
    52  	uri   string // Peering URI in complete form
    53  	sintf string // Peering source interface (i.e. from InterfacePeers)
    54  }
    55  
    56  // link tracks the state of a connection, either persistent or non-persistent
    57  type link struct {
    58  	ctx       context.Context    // Connection context
    59  	cancel    context.CancelFunc // Stop future redial attempts (when peer removed)
    60  	kick      chan struct{}      // Attempt to reconnect now, if backing off
    61  	linkType  linkType           // Type of link, i.e. outbound/inbound, persistent/ephemeral
    62  	linkProto string             // Protocol carrier of link, e.g. TCP, AWDL
    63  	// The remaining fields can only be modified safely from within the links actor
    64  	_conn    *linkConn // Connected link, if any, nil if not connected
    65  	_err     error     // Last error on the connection, if any
    66  	_errtime time.Time // Last time an error occurred
    67  }
    68  
    69  type linkOptions struct {
    70  	pinnedEd25519Keys map[keyArray]struct{}
    71  	priority          uint8
    72  	tlsSNI            string
    73  	password          []byte
    74  	maxBackoff        time.Duration
    75  }
    76  
    77  type Listener struct {
    78  	listener net.Listener
    79  	ctx      context.Context
    80  	Cancel   context.CancelFunc
    81  }
    82  
    83  func (l *Listener) Addr() net.Addr {
    84  	return l.listener.Addr()
    85  }
    86  
    87  func (l *Listener) Close() error {
    88  	l.Cancel()
    89  	err := l.listener.Close()
    90  	<-l.ctx.Done()
    91  	return err
    92  }
    93  
    94  func (l *links) init(c *Core) error {
    95  	l.core = c
    96  	l.tcp = l.newLinkTCP()
    97  	l.tls = l.newLinkTLS(l.tcp)
    98  	l.unix = l.newLinkUNIX()
    99  	l.socks = l.newLinkSOCKS()
   100  	l.quic = l.newLinkQUIC()
   101  	l._links = make(map[linkInfo]*link)
   102  
   103  	var listeners []ListenAddress
   104  	phony.Block(c, func() {
   105  		listeners = make([]ListenAddress, 0, len(c.config._listeners))
   106  		for listener := range c.config._listeners {
   107  			listeners = append(listeners, listener)
   108  		}
   109  	})
   110  
   111  	return nil
   112  }
   113  
   114  func (l *links) shutdown() {
   115  	phony.Block(l.tcp, func() {
   116  		for l := range l.tcp._listeners {
   117  			_ = l.Close()
   118  		}
   119  	})
   120  	phony.Block(l.tls, func() {
   121  		for l := range l.tls._listeners {
   122  			_ = l.Close()
   123  		}
   124  	})
   125  	phony.Block(l.unix, func() {
   126  		for l := range l.unix._listeners {
   127  			_ = l.Close()
   128  		}
   129  	})
   130  }
   131  
   132  type linkError string
   133  
   134  func (e linkError) Error() string { return string(e) }
   135  
   136  const ErrLinkAlreadyConfigured = linkError("peer is already configured")
   137  const ErrLinkNotConfigured = linkError("peer is not configured")
   138  const ErrLinkPriorityInvalid = linkError("priority value is invalid")
   139  const ErrLinkPinnedKeyInvalid = linkError("pinned public key is invalid")
   140  const ErrLinkPasswordInvalid = linkError("password is invalid")
   141  const ErrLinkUnrecognisedSchema = linkError("link schema unknown")
   142  const ErrLinkMaxBackoffInvalid = linkError("max backoff duration invalid")
   143  
   144  func (l *links) add(u *url.URL, sintf string, linkType linkType) error {
   145  	var retErr error
   146  	phony.Block(l, func() {
   147  		// Generate the link info and see whether we think we already
   148  		// have an open peering to this peer.
   149  		lu := urlForLinkInfo(*u)
   150  		info := linkInfo{
   151  			uri:   lu.String(),
   152  			sintf: sintf,
   153  		}
   154  
   155  		// Collect together the link options, these are global options
   156  		// that are not specific to any given protocol.
   157  		options := linkOptions{
   158  			maxBackoff: defaultBackoffLimit,
   159  		}
   160  		for _, pubkey := range u.Query()["key"] {
   161  			sigPub, err := hex.DecodeString(pubkey)
   162  			if err != nil {
   163  				retErr = ErrLinkPinnedKeyInvalid
   164  				return
   165  			}
   166  			var sigPubKey keyArray
   167  			copy(sigPubKey[:], sigPub)
   168  			if options.pinnedEd25519Keys == nil {
   169  				options.pinnedEd25519Keys = map[keyArray]struct{}{}
   170  			}
   171  			options.pinnedEd25519Keys[sigPubKey] = struct{}{}
   172  		}
   173  		if p := u.Query().Get("priority"); p != "" {
   174  			pi, err := strconv.ParseUint(p, 10, 8)
   175  			if err != nil {
   176  				retErr = ErrLinkPriorityInvalid
   177  				return
   178  			}
   179  			options.priority = uint8(pi)
   180  		}
   181  		if p := u.Query().Get("password"); p != "" {
   182  			if len(p) > blake2b.Size {
   183  				retErr = ErrLinkPasswordInvalid
   184  				return
   185  			}
   186  			options.password = []byte(p)
   187  		}
   188  		if p := u.Query().Get("maxbackoff"); p != "" {
   189  			d, err := time.ParseDuration(p)
   190  			if err != nil || d < minimumBackoffLimit {
   191  				retErr = ErrLinkMaxBackoffInvalid
   192  				return
   193  			}
   194  			options.maxBackoff = d
   195  		}
   196  		// SNI headers must contain hostnames and not IP addresses, so we must make sure
   197  		// that we do not populate the SNI with an IP literal. We do this by splitting
   198  		// the host-port combo from the query option and then seeing if it parses to an
   199  		// IP address successfully or not.
   200  		if sni := u.Query().Get("sni"); sni != "" {
   201  			if net.ParseIP(sni) == nil {
   202  				options.tlsSNI = sni
   203  			}
   204  		}
   205  		// If the SNI is not configured still because the above failed then we'll try
   206  		// again but this time we'll use the host part of the peering URI instead.
   207  		if options.tlsSNI == "" {
   208  			if host, _, err := net.SplitHostPort(u.Host); err == nil && net.ParseIP(host) == nil {
   209  				options.tlsSNI = host
   210  			}
   211  		}
   212  
   213  		// If we think we're already connected to this peer, load up
   214  		// the existing peer state. Try to kick the peer if possible,
   215  		// which will cause an immediate connection attempt if it is
   216  		// backing off for some reason.
   217  		state, ok := l._links[info]
   218  		if ok && state != nil {
   219  			select {
   220  			case state.kick <- struct{}{}:
   221  			default:
   222  			}
   223  			retErr = ErrLinkAlreadyConfigured
   224  			return
   225  		}
   226  
   227  		// Create the link entry. This will contain the connection
   228  		// in progress (if any), any error details and a context that
   229  		// lets the link be cancelled later.
   230  		state = &link{
   231  			linkType:  linkType,
   232  			linkProto: strings.ToUpper(u.Scheme),
   233  			kick:      make(chan struct{}),
   234  		}
   235  		state.ctx, state.cancel = context.WithCancel(l.core.ctx)
   236  
   237  		// Store the state of the link so that it can be queried later.
   238  		l._links[info] = state
   239  
   240  		// Track how many consecutive connection failures we have had,
   241  		// as we will back off exponentially rather than hammering the
   242  		// remote node endlessly.
   243  		var backoff int
   244  
   245  		// backoffNow is called when there's a connection error. It
   246  		// will wait for the specified amount of time and then return
   247  		// true, unless the peering context was cancelled (due to a
   248  		// peer removal most likely), in which case it returns false.
   249  		// The caller should check the return value to decide whether
   250  		// or not to give up trying.
   251  		backoffNow := func() bool {
   252  			if backoff < 32 {
   253  				backoff++
   254  			}
   255  			duration := time.Second << backoff
   256  			if duration > options.maxBackoff {
   257  				duration = options.maxBackoff
   258  			}
   259  			select {
   260  			case <-state.kick:
   261  				return true
   262  			case <-state.ctx.Done():
   263  				return false
   264  			case <-l.core.ctx.Done():
   265  				return false
   266  			case <-time.After(duration):
   267  				return true
   268  			}
   269  		}
   270  
   271  		// resetBackoff is called by the connection handler when the
   272  		// handshake has successfully completed.
   273  		resetBackoff := func() {
   274  			backoff = 0
   275  		}
   276  
   277  		// The goroutine is responsible for attempting the connection
   278  		// and then running the handler. If the connection is persistent
   279  		// then the loop will run endlessly, using backoffs as needed.
   280  		// Otherwise the loop will end, cleaning up the link entry.
   281  		go func() {
   282  			defer phony.Block(l, func() {
   283  				if l._links[info] == state {
   284  					delete(l._links, info)
   285  				}
   286  			})
   287  
   288  			// This loop will run each and every time we want to attempt
   289  			// a connection to this peer.
   290  			// TODO get rid of this loop, this is *exactly* what time.AfterFunc is for, we should just send a signal to the links actor to kick off a goroutine as needed
   291  			for {
   292  				select {
   293  				case <-state.ctx.Done():
   294  					// The peering context has been cancelled, so don't try
   295  					// to dial again.
   296  					return
   297  				default:
   298  				}
   299  
   300  				conn, err := l.connect(state.ctx, u, info, options)
   301  				if err != nil || conn == nil {
   302  					if err == nil && conn == nil {
   303  						l.core.log.Warnf("Link %q reached inconsistent error state", u.String())
   304  					}
   305  					if linkType == linkTypePersistent {
   306  						// If the link is a persistent configured peering,
   307  						// store information about the connection error so
   308  						// that we can report it through the admin socket.
   309  						phony.Block(l, func() {
   310  							state._conn = nil
   311  							state._err = err
   312  							state._errtime = time.Now()
   313  						})
   314  
   315  						// Back off for a bit. If true is returned here, we
   316  						// can continue onto the next loop iteration to try
   317  						// the next connection.
   318  						if backoffNow() {
   319  							continue
   320  						}
   321  						return
   322  					}
   323  					// Ephemeral and incoming connections don't remain
   324  					// after a connection failure, so exit out of the
   325  					// loop and clean up the link entry.
   326  					break
   327  				}
   328  
   329  				// The linkConn wrapper allows us to track the number of
   330  				// bytes written to and read from this connection without
   331  				// the help of ironwood.
   332  				lc := &linkConn{
   333  					Conn: conn,
   334  					up:   time.Now(),
   335  				}
   336  
   337  				// Update the link state with our newly wrapped connection.
   338  				// Clear the error state.
   339  				var doRet bool
   340  				phony.Block(l, func() {
   341  					if state._conn != nil {
   342  						// If a peering has come up in this time, abort this one.
   343  						doRet = true
   344  					}
   345  					state._conn = lc
   346  				})
   347  				if doRet {
   348  					return
   349  				}
   350  
   351  				// Give the connection to the handler. The handler will block
   352  				// for the lifetime of the connection.
   353  				if err = l.handler(linkType, options, lc, resetBackoff); err != nil && err != io.EOF {
   354  					l.core.log.Debugf("Link %s error: %s\n", info.uri, err)
   355  				}
   356  
   357  				// The handler has stopped running so the connection is dead,
   358  				// try to close the underlying socket just in case and then
   359  				// update the link state.
   360  				_ = lc.Close()
   361  				phony.Block(l, func() {
   362  					state._conn = nil
   363  					if state._err = err; state._err != nil {
   364  						state._errtime = time.Now()
   365  					}
   366  				})
   367  
   368  				// If the link is persistently configured, back off if needed
   369  				// and then try reconnecting. Otherwise, exit out.
   370  				if linkType == linkTypePersistent {
   371  					if backoffNow() {
   372  						continue
   373  					}
   374  					return
   375  				}
   376  			}
   377  		}()
   378  	})
   379  	return retErr
   380  }
   381  
   382  func (l *links) remove(u *url.URL, sintf string, linkType linkType) error {
   383  	var retErr error
   384  	phony.Block(l, func() {
   385  		// Generate the link info and see whether we think we already
   386  		// have an open peering to this peer.
   387  		lu := urlForLinkInfo(*u)
   388  		info := linkInfo{
   389  			uri:   lu.String(),
   390  			sintf: sintf,
   391  		}
   392  
   393  		// If this peer is already configured then we will close the
   394  		// connection and stop it from retrying.
   395  		state, ok := l._links[info]
   396  		if ok && state != nil {
   397  			state.cancel()
   398  			if conn := state._conn; conn != nil {
   399  				retErr = conn.Close()
   400  			}
   401  			return
   402  		}
   403  
   404  		retErr = ErrLinkNotConfigured
   405  	})
   406  	return retErr
   407  }
   408  
   409  func (l *links) listen(u *url.URL, sintf string) (*Listener, error) {
   410  	ctx, cancel := context.WithCancel(l.core.ctx)
   411  	var protocol linkProtocol
   412  	switch strings.ToLower(u.Scheme) {
   413  	case "tcp":
   414  		protocol = l.tcp
   415  	case "tls":
   416  		protocol = l.tls
   417  	case "unix":
   418  		protocol = l.unix
   419  	case "quic":
   420  		protocol = l.quic
   421  	default:
   422  		cancel()
   423  		return nil, ErrLinkUnrecognisedSchema
   424  	}
   425  	listener, err := protocol.listen(ctx, u, sintf)
   426  	if err != nil {
   427  		cancel()
   428  		return nil, err
   429  	}
   430  	li := &Listener{
   431  		listener: listener,
   432  		ctx:      ctx,
   433  		Cancel:   cancel,
   434  	}
   435  
   436  	var options linkOptions
   437  	if p := u.Query().Get("priority"); p != "" {
   438  		pi, err := strconv.ParseUint(p, 10, 8)
   439  		if err != nil {
   440  			return nil, ErrLinkPriorityInvalid
   441  		}
   442  		options.priority = uint8(pi)
   443  	}
   444  	if p := u.Query().Get("password"); p != "" {
   445  		if len(p) > blake2b.Size {
   446  			return nil, ErrLinkPasswordInvalid
   447  		}
   448  		options.password = []byte(p)
   449  	}
   450  
   451  	go func() {
   452  		l.core.log.Infof("%s listener started on %s", strings.ToUpper(u.Scheme), listener.Addr())
   453  		defer l.core.log.Infof("%s listener stopped on %s", strings.ToUpper(u.Scheme), listener.Addr())
   454  		for {
   455  			conn, err := listener.Accept()
   456  			if err != nil {
   457  				return
   458  			}
   459  			go func(conn net.Conn) {
   460  				defer conn.Close()
   461  
   462  				// In order to populate a somewhat sane looking connection
   463  				// URI in the admin socket, we need to replace the host in
   464  				// the listener URL with the remote address.
   465  				pu := *u
   466  				pu.Host = conn.RemoteAddr().String()
   467  				lu := urlForLinkInfo(pu)
   468  				info := linkInfo{
   469  					uri:   lu.String(),
   470  					sintf: sintf,
   471  				}
   472  
   473  				// If there's an existing link state for this link, get it.
   474  				// If this node is already connected to us, just drop the
   475  				// connection. This prevents duplicate peerings.
   476  				var lc *linkConn
   477  				var state *link
   478  				phony.Block(l, func() {
   479  					var ok bool
   480  					state, ok = l._links[info]
   481  					if !ok || state == nil {
   482  						state = &link{
   483  							linkType:  linkTypeIncoming,
   484  							linkProto: strings.ToUpper(u.Scheme),
   485  							kick:      make(chan struct{}),
   486  						}
   487  					}
   488  					if state._conn != nil {
   489  						// If a connection has come up in this time, abort
   490  						// this one.
   491  						return
   492  					}
   493  
   494  					// The linkConn wrapper allows us to track the number of
   495  					// bytes written to and read from this connection without
   496  					// the help of ironwood.
   497  					lc = &linkConn{
   498  						Conn: conn,
   499  						up:   time.Now(),
   500  					}
   501  
   502  					// Update the link state with our newly wrapped connection.
   503  					// Clear the error state.
   504  					state._conn = lc
   505  					state._err = nil
   506  					state._errtime = time.Time{}
   507  
   508  					// Store the state of the link so that it can be queried later.
   509  					l._links[info] = state
   510  				})
   511  				if lc == nil {
   512  					return
   513  				}
   514  
   515  				// Give the connection to the handler. The handler will block
   516  				// for the lifetime of the connection.
   517  				if err = l.handler(linkTypeIncoming, options, lc, nil); err != nil && err != io.EOF {
   518  					l.core.log.Debugf("Link %s error: %s\n", u.Host, err)
   519  				}
   520  
   521  				// The handler has stopped running so the connection is dead,
   522  				// try to close the underlying socket just in case and then
   523  				// drop the link state.
   524  				_ = lc.Close()
   525  				phony.Block(l, func() {
   526  					if l._links[info] == state {
   527  						delete(l._links, info)
   528  					}
   529  				})
   530  			}(conn)
   531  		}
   532  	}()
   533  	return li, nil
   534  }
   535  
   536  func (l *links) connect(ctx context.Context, u *url.URL, info linkInfo, options linkOptions) (net.Conn, error) {
   537  	var dialer linkProtocol
   538  	switch strings.ToLower(u.Scheme) {
   539  	case "tcp":
   540  		dialer = l.tcp
   541  	case "tls":
   542  		dialer = l.tls
   543  	case "socks", "sockstls":
   544  		dialer = l.socks
   545  	case "unix":
   546  		dialer = l.unix
   547  	case "quic":
   548  		dialer = l.quic
   549  	default:
   550  		return nil, ErrLinkUnrecognisedSchema
   551  	}
   552  	return dialer.dial(ctx, u, info, options)
   553  }
   554  
   555  func (l *links) handler(linkType linkType, options linkOptions, conn net.Conn, success func()) error {
   556  	meta := version_getBaseMetadata()
   557  	meta.publicKey = l.core.public
   558  	meta.priority = options.priority
   559  	metaBytes, err := meta.encode(l.core.secret, options.password)
   560  	if err != nil {
   561  		return fmt.Errorf("failed to generate handshake: %w", err)
   562  	}
   563  	if err := conn.SetDeadline(time.Now().Add(time.Second * 6)); err != nil {
   564  		return fmt.Errorf("failed to set handshake deadline: %w", err)
   565  	}
   566  	n, err := conn.Write(metaBytes)
   567  	switch {
   568  	case err != nil:
   569  		return fmt.Errorf("write handshake: %w", err)
   570  	case err == nil && n != len(metaBytes):
   571  		return fmt.Errorf("incomplete handshake send")
   572  	}
   573  	meta = version_metadata{}
   574  	base := version_getBaseMetadata()
   575  	if err := meta.decode(conn, options.password); err != nil {
   576  		_ = conn.Close()
   577  		return err
   578  	}
   579  	if !meta.check() {
   580  		return fmt.Errorf("remote node incompatible version (local %s, remote %s)",
   581  			fmt.Sprintf("%d.%d", base.majorVer, base.minorVer),
   582  			fmt.Sprintf("%d.%d", meta.majorVer, meta.minorVer),
   583  		)
   584  	}
   585  	if err = conn.SetDeadline(time.Time{}); err != nil {
   586  		return fmt.Errorf("failed to clear handshake deadline: %w", err)
   587  	}
   588  	// Check if the remote side matches the keys we expected. This is a bit of a weak
   589  	// check - in future versions we really should check a signature or something like that.
   590  	if pinned := options.pinnedEd25519Keys; len(pinned) > 0 {
   591  		var key keyArray
   592  		copy(key[:], meta.publicKey)
   593  		if _, allowed := pinned[key]; !allowed {
   594  			return fmt.Errorf("node public key that does not match pinned keys")
   595  		}
   596  	}
   597  	// Check if we're authorized to connect to this key / IP
   598  	var allowed map[[32]byte]struct{}
   599  	phony.Block(l.core, func() {
   600  		allowed = l.core.config._allowedPublicKeys
   601  	})
   602  	isallowed := len(allowed) == 0
   603  	for k := range allowed {
   604  		if bytes.Equal(k[:], meta.publicKey) {
   605  			isallowed = true
   606  			break
   607  		}
   608  	}
   609  	if linkType == linkTypeIncoming && !isallowed {
   610  		return fmt.Errorf("node public key %q is not in AllowedPublicKeys", hex.EncodeToString(meta.publicKey))
   611  	}
   612  
   613  	dir := "outbound"
   614  	if linkType == linkTypeIncoming {
   615  		dir = "inbound"
   616  	}
   617  	remoteAddr := net.IP(address.AddrForKey(meta.publicKey)[:]).String()
   618  	remoteStr := fmt.Sprintf("%s@%s", remoteAddr, conn.RemoteAddr())
   619  	localStr := conn.LocalAddr()
   620  	priority := options.priority
   621  	if meta.priority > priority {
   622  		priority = meta.priority
   623  	}
   624  	l.core.log.Infof("Connected %s: %s, source %s",
   625  		dir, remoteStr, localStr)
   626  	if success != nil {
   627  		success()
   628  	}
   629  
   630  	err = l.core.HandleConn(meta.publicKey, conn, priority)
   631  	switch err {
   632  	case io.EOF, net.ErrClosed, nil:
   633  		l.core.log.Infof("Disconnected %s: %s, source %s",
   634  			dir, remoteStr, localStr)
   635  	default:
   636  		l.core.log.Infof("Disconnected %s: %s, source %s; error: %s",
   637  			dir, remoteStr, localStr, err)
   638  	}
   639  	return nil
   640  }
   641  
   642  func urlForLinkInfo(u url.URL) url.URL {
   643  	u.RawQuery = ""
   644  	if host, _, err := net.SplitHostPort(u.Host); err == nil {
   645  		if addr, err := netip.ParseAddr(host); err == nil {
   646  			// For peers that look like multicast peers (i.e.
   647  			// link-local addresses), we will ignore the port number,
   648  			// otherwise we might open multiple connections to them.
   649  			if addr.IsLinkLocalUnicast() {
   650  				u.Host = fmt.Sprintf("[%s]", addr.String())
   651  			}
   652  		}
   653  	}
   654  	return u
   655  }
   656  
   657  type linkConn struct {
   658  	// tx and rx are at the beginning of the struct to ensure 64-bit alignment
   659  	// on 32-bit platforms, see https://pkg.go.dev/sync/atomic#pkg-note-BUG
   660  	rx uint64
   661  	tx uint64
   662  	up time.Time
   663  	net.Conn
   664  }
   665  
   666  func (c *linkConn) Read(p []byte) (n int, err error) {
   667  	n, err = c.Conn.Read(p)
   668  	atomic.AddUint64(&c.rx, uint64(n))
   669  	return
   670  }
   671  
   672  func (c *linkConn) Write(p []byte) (n int, err error) {
   673  	n, err = c.Conn.Write(p)
   674  	atomic.AddUint64(&c.tx, uint64(n))
   675  	return
   676  }