github.com/yggdrasil-network/yggdrasil-go@v0.5.6/src/core/link.go (about) 1 package core 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/hex" 7 "fmt" 8 "io" 9 "net" 10 "net/netip" 11 "net/url" 12 "strconv" 13 "strings" 14 "sync/atomic" 15 "time" 16 17 "github.com/Arceliar/phony" 18 "github.com/yggdrasil-network/yggdrasil-go/src/address" 19 "golang.org/x/crypto/blake2b" 20 ) 21 22 type linkType int 23 24 const ( 25 linkTypePersistent linkType = iota // Statically configured 26 linkTypeEphemeral // Multicast discovered 27 linkTypeIncoming // Incoming connection 28 ) 29 30 const defaultBackoffLimit = time.Second << 12 // 1h8m16s 31 const minimumBackoffLimit = time.Second * 30 32 33 type links struct { 34 phony.Inbox 35 core *Core 36 tcp *linkTCP // TCP interface support 37 tls *linkTLS // TLS interface support 38 unix *linkUNIX // UNIX interface support 39 socks *linkSOCKS // SOCKS interface support 40 quic *linkQUIC // QUIC interface support 41 // _links can only be modified safely from within the links actor 42 _links map[linkInfo]*link // *link is nil if connection in progress 43 } 44 45 type linkProtocol interface { 46 dial(ctx context.Context, url *url.URL, info linkInfo, options linkOptions) (net.Conn, error) 47 listen(ctx context.Context, url *url.URL, sintf string) (net.Listener, error) 48 } 49 50 // linkInfo is used as a map key 51 type linkInfo struct { 52 uri string // Peering URI in complete form 53 sintf string // Peering source interface (i.e. from InterfacePeers) 54 } 55 56 // link tracks the state of a connection, either persistent or non-persistent 57 type link struct { 58 ctx context.Context // Connection context 59 cancel context.CancelFunc // Stop future redial attempts (when peer removed) 60 kick chan struct{} // Attempt to reconnect now, if backing off 61 linkType linkType // Type of link, i.e. outbound/inbound, persistent/ephemeral 62 linkProto string // Protocol carrier of link, e.g. TCP, AWDL 63 // The remaining fields can only be modified safely from within the links actor 64 _conn *linkConn // Connected link, if any, nil if not connected 65 _err error // Last error on the connection, if any 66 _errtime time.Time // Last time an error occurred 67 } 68 69 type linkOptions struct { 70 pinnedEd25519Keys map[keyArray]struct{} 71 priority uint8 72 tlsSNI string 73 password []byte 74 maxBackoff time.Duration 75 } 76 77 type Listener struct { 78 listener net.Listener 79 ctx context.Context 80 Cancel context.CancelFunc 81 } 82 83 func (l *Listener) Addr() net.Addr { 84 return l.listener.Addr() 85 } 86 87 func (l *Listener) Close() error { 88 l.Cancel() 89 err := l.listener.Close() 90 <-l.ctx.Done() 91 return err 92 } 93 94 func (l *links) init(c *Core) error { 95 l.core = c 96 l.tcp = l.newLinkTCP() 97 l.tls = l.newLinkTLS(l.tcp) 98 l.unix = l.newLinkUNIX() 99 l.socks = l.newLinkSOCKS() 100 l.quic = l.newLinkQUIC() 101 l._links = make(map[linkInfo]*link) 102 103 var listeners []ListenAddress 104 phony.Block(c, func() { 105 listeners = make([]ListenAddress, 0, len(c.config._listeners)) 106 for listener := range c.config._listeners { 107 listeners = append(listeners, listener) 108 } 109 }) 110 111 return nil 112 } 113 114 func (l *links) shutdown() { 115 phony.Block(l.tcp, func() { 116 for l := range l.tcp._listeners { 117 _ = l.Close() 118 } 119 }) 120 phony.Block(l.tls, func() { 121 for l := range l.tls._listeners { 122 _ = l.Close() 123 } 124 }) 125 phony.Block(l.unix, func() { 126 for l := range l.unix._listeners { 127 _ = l.Close() 128 } 129 }) 130 } 131 132 type linkError string 133 134 func (e linkError) Error() string { return string(e) } 135 136 const ErrLinkAlreadyConfigured = linkError("peer is already configured") 137 const ErrLinkNotConfigured = linkError("peer is not configured") 138 const ErrLinkPriorityInvalid = linkError("priority value is invalid") 139 const ErrLinkPinnedKeyInvalid = linkError("pinned public key is invalid") 140 const ErrLinkPasswordInvalid = linkError("password is invalid") 141 const ErrLinkUnrecognisedSchema = linkError("link schema unknown") 142 const ErrLinkMaxBackoffInvalid = linkError("max backoff duration invalid") 143 144 func (l *links) add(u *url.URL, sintf string, linkType linkType) error { 145 var retErr error 146 phony.Block(l, func() { 147 // Generate the link info and see whether we think we already 148 // have an open peering to this peer. 149 lu := urlForLinkInfo(*u) 150 info := linkInfo{ 151 uri: lu.String(), 152 sintf: sintf, 153 } 154 155 // Collect together the link options, these are global options 156 // that are not specific to any given protocol. 157 options := linkOptions{ 158 maxBackoff: defaultBackoffLimit, 159 } 160 for _, pubkey := range u.Query()["key"] { 161 sigPub, err := hex.DecodeString(pubkey) 162 if err != nil { 163 retErr = ErrLinkPinnedKeyInvalid 164 return 165 } 166 var sigPubKey keyArray 167 copy(sigPubKey[:], sigPub) 168 if options.pinnedEd25519Keys == nil { 169 options.pinnedEd25519Keys = map[keyArray]struct{}{} 170 } 171 options.pinnedEd25519Keys[sigPubKey] = struct{}{} 172 } 173 if p := u.Query().Get("priority"); p != "" { 174 pi, err := strconv.ParseUint(p, 10, 8) 175 if err != nil { 176 retErr = ErrLinkPriorityInvalid 177 return 178 } 179 options.priority = uint8(pi) 180 } 181 if p := u.Query().Get("password"); p != "" { 182 if len(p) > blake2b.Size { 183 retErr = ErrLinkPasswordInvalid 184 return 185 } 186 options.password = []byte(p) 187 } 188 if p := u.Query().Get("maxbackoff"); p != "" { 189 d, err := time.ParseDuration(p) 190 if err != nil || d < minimumBackoffLimit { 191 retErr = ErrLinkMaxBackoffInvalid 192 return 193 } 194 options.maxBackoff = d 195 } 196 // SNI headers must contain hostnames and not IP addresses, so we must make sure 197 // that we do not populate the SNI with an IP literal. We do this by splitting 198 // the host-port combo from the query option and then seeing if it parses to an 199 // IP address successfully or not. 200 if sni := u.Query().Get("sni"); sni != "" { 201 if net.ParseIP(sni) == nil { 202 options.tlsSNI = sni 203 } 204 } 205 // If the SNI is not configured still because the above failed then we'll try 206 // again but this time we'll use the host part of the peering URI instead. 207 if options.tlsSNI == "" { 208 if host, _, err := net.SplitHostPort(u.Host); err == nil && net.ParseIP(host) == nil { 209 options.tlsSNI = host 210 } 211 } 212 213 // If we think we're already connected to this peer, load up 214 // the existing peer state. Try to kick the peer if possible, 215 // which will cause an immediate connection attempt if it is 216 // backing off for some reason. 217 state, ok := l._links[info] 218 if ok && state != nil { 219 select { 220 case state.kick <- struct{}{}: 221 default: 222 } 223 retErr = ErrLinkAlreadyConfigured 224 return 225 } 226 227 // Create the link entry. This will contain the connection 228 // in progress (if any), any error details and a context that 229 // lets the link be cancelled later. 230 state = &link{ 231 linkType: linkType, 232 linkProto: strings.ToUpper(u.Scheme), 233 kick: make(chan struct{}), 234 } 235 state.ctx, state.cancel = context.WithCancel(l.core.ctx) 236 237 // Store the state of the link so that it can be queried later. 238 l._links[info] = state 239 240 // Track how many consecutive connection failures we have had, 241 // as we will back off exponentially rather than hammering the 242 // remote node endlessly. 243 var backoff int 244 245 // backoffNow is called when there's a connection error. It 246 // will wait for the specified amount of time and then return 247 // true, unless the peering context was cancelled (due to a 248 // peer removal most likely), in which case it returns false. 249 // The caller should check the return value to decide whether 250 // or not to give up trying. 251 backoffNow := func() bool { 252 if backoff < 32 { 253 backoff++ 254 } 255 duration := time.Second << backoff 256 if duration > options.maxBackoff { 257 duration = options.maxBackoff 258 } 259 select { 260 case <-state.kick: 261 return true 262 case <-state.ctx.Done(): 263 return false 264 case <-l.core.ctx.Done(): 265 return false 266 case <-time.After(duration): 267 return true 268 } 269 } 270 271 // resetBackoff is called by the connection handler when the 272 // handshake has successfully completed. 273 resetBackoff := func() { 274 backoff = 0 275 } 276 277 // The goroutine is responsible for attempting the connection 278 // and then running the handler. If the connection is persistent 279 // then the loop will run endlessly, using backoffs as needed. 280 // Otherwise the loop will end, cleaning up the link entry. 281 go func() { 282 defer phony.Block(l, func() { 283 if l._links[info] == state { 284 delete(l._links, info) 285 } 286 }) 287 288 // This loop will run each and every time we want to attempt 289 // a connection to this peer. 290 // TODO get rid of this loop, this is *exactly* what time.AfterFunc is for, we should just send a signal to the links actor to kick off a goroutine as needed 291 for { 292 select { 293 case <-state.ctx.Done(): 294 // The peering context has been cancelled, so don't try 295 // to dial again. 296 return 297 default: 298 } 299 300 conn, err := l.connect(state.ctx, u, info, options) 301 if err != nil || conn == nil { 302 if err == nil && conn == nil { 303 l.core.log.Warnf("Link %q reached inconsistent error state", u.String()) 304 } 305 if linkType == linkTypePersistent { 306 // If the link is a persistent configured peering, 307 // store information about the connection error so 308 // that we can report it through the admin socket. 309 phony.Block(l, func() { 310 state._conn = nil 311 state._err = err 312 state._errtime = time.Now() 313 }) 314 315 // Back off for a bit. If true is returned here, we 316 // can continue onto the next loop iteration to try 317 // the next connection. 318 if backoffNow() { 319 continue 320 } 321 return 322 } 323 // Ephemeral and incoming connections don't remain 324 // after a connection failure, so exit out of the 325 // loop and clean up the link entry. 326 break 327 } 328 329 // The linkConn wrapper allows us to track the number of 330 // bytes written to and read from this connection without 331 // the help of ironwood. 332 lc := &linkConn{ 333 Conn: conn, 334 up: time.Now(), 335 } 336 337 // Update the link state with our newly wrapped connection. 338 // Clear the error state. 339 var doRet bool 340 phony.Block(l, func() { 341 if state._conn != nil { 342 // If a peering has come up in this time, abort this one. 343 doRet = true 344 } 345 state._conn = lc 346 }) 347 if doRet { 348 return 349 } 350 351 // Give the connection to the handler. The handler will block 352 // for the lifetime of the connection. 353 if err = l.handler(linkType, options, lc, resetBackoff); err != nil && err != io.EOF { 354 l.core.log.Debugf("Link %s error: %s\n", info.uri, err) 355 } 356 357 // The handler has stopped running so the connection is dead, 358 // try to close the underlying socket just in case and then 359 // update the link state. 360 _ = lc.Close() 361 phony.Block(l, func() { 362 state._conn = nil 363 if state._err = err; state._err != nil { 364 state._errtime = time.Now() 365 } 366 }) 367 368 // If the link is persistently configured, back off if needed 369 // and then try reconnecting. Otherwise, exit out. 370 if linkType == linkTypePersistent { 371 if backoffNow() { 372 continue 373 } 374 return 375 } 376 } 377 }() 378 }) 379 return retErr 380 } 381 382 func (l *links) remove(u *url.URL, sintf string, linkType linkType) error { 383 var retErr error 384 phony.Block(l, func() { 385 // Generate the link info and see whether we think we already 386 // have an open peering to this peer. 387 lu := urlForLinkInfo(*u) 388 info := linkInfo{ 389 uri: lu.String(), 390 sintf: sintf, 391 } 392 393 // If this peer is already configured then we will close the 394 // connection and stop it from retrying. 395 state, ok := l._links[info] 396 if ok && state != nil { 397 state.cancel() 398 if conn := state._conn; conn != nil { 399 retErr = conn.Close() 400 } 401 return 402 } 403 404 retErr = ErrLinkNotConfigured 405 }) 406 return retErr 407 } 408 409 func (l *links) listen(u *url.URL, sintf string) (*Listener, error) { 410 ctx, cancel := context.WithCancel(l.core.ctx) 411 var protocol linkProtocol 412 switch strings.ToLower(u.Scheme) { 413 case "tcp": 414 protocol = l.tcp 415 case "tls": 416 protocol = l.tls 417 case "unix": 418 protocol = l.unix 419 case "quic": 420 protocol = l.quic 421 default: 422 cancel() 423 return nil, ErrLinkUnrecognisedSchema 424 } 425 listener, err := protocol.listen(ctx, u, sintf) 426 if err != nil { 427 cancel() 428 return nil, err 429 } 430 li := &Listener{ 431 listener: listener, 432 ctx: ctx, 433 Cancel: cancel, 434 } 435 436 var options linkOptions 437 if p := u.Query().Get("priority"); p != "" { 438 pi, err := strconv.ParseUint(p, 10, 8) 439 if err != nil { 440 return nil, ErrLinkPriorityInvalid 441 } 442 options.priority = uint8(pi) 443 } 444 if p := u.Query().Get("password"); p != "" { 445 if len(p) > blake2b.Size { 446 return nil, ErrLinkPasswordInvalid 447 } 448 options.password = []byte(p) 449 } 450 451 go func() { 452 l.core.log.Infof("%s listener started on %s", strings.ToUpper(u.Scheme), listener.Addr()) 453 defer l.core.log.Infof("%s listener stopped on %s", strings.ToUpper(u.Scheme), listener.Addr()) 454 for { 455 conn, err := listener.Accept() 456 if err != nil { 457 return 458 } 459 go func(conn net.Conn) { 460 defer conn.Close() 461 462 // In order to populate a somewhat sane looking connection 463 // URI in the admin socket, we need to replace the host in 464 // the listener URL with the remote address. 465 pu := *u 466 pu.Host = conn.RemoteAddr().String() 467 lu := urlForLinkInfo(pu) 468 info := linkInfo{ 469 uri: lu.String(), 470 sintf: sintf, 471 } 472 473 // If there's an existing link state for this link, get it. 474 // If this node is already connected to us, just drop the 475 // connection. This prevents duplicate peerings. 476 var lc *linkConn 477 var state *link 478 phony.Block(l, func() { 479 var ok bool 480 state, ok = l._links[info] 481 if !ok || state == nil { 482 state = &link{ 483 linkType: linkTypeIncoming, 484 linkProto: strings.ToUpper(u.Scheme), 485 kick: make(chan struct{}), 486 } 487 } 488 if state._conn != nil { 489 // If a connection has come up in this time, abort 490 // this one. 491 return 492 } 493 494 // The linkConn wrapper allows us to track the number of 495 // bytes written to and read from this connection without 496 // the help of ironwood. 497 lc = &linkConn{ 498 Conn: conn, 499 up: time.Now(), 500 } 501 502 // Update the link state with our newly wrapped connection. 503 // Clear the error state. 504 state._conn = lc 505 state._err = nil 506 state._errtime = time.Time{} 507 508 // Store the state of the link so that it can be queried later. 509 l._links[info] = state 510 }) 511 if lc == nil { 512 return 513 } 514 515 // Give the connection to the handler. The handler will block 516 // for the lifetime of the connection. 517 if err = l.handler(linkTypeIncoming, options, lc, nil); err != nil && err != io.EOF { 518 l.core.log.Debugf("Link %s error: %s\n", u.Host, err) 519 } 520 521 // The handler has stopped running so the connection is dead, 522 // try to close the underlying socket just in case and then 523 // drop the link state. 524 _ = lc.Close() 525 phony.Block(l, func() { 526 if l._links[info] == state { 527 delete(l._links, info) 528 } 529 }) 530 }(conn) 531 } 532 }() 533 return li, nil 534 } 535 536 func (l *links) connect(ctx context.Context, u *url.URL, info linkInfo, options linkOptions) (net.Conn, error) { 537 var dialer linkProtocol 538 switch strings.ToLower(u.Scheme) { 539 case "tcp": 540 dialer = l.tcp 541 case "tls": 542 dialer = l.tls 543 case "socks", "sockstls": 544 dialer = l.socks 545 case "unix": 546 dialer = l.unix 547 case "quic": 548 dialer = l.quic 549 default: 550 return nil, ErrLinkUnrecognisedSchema 551 } 552 return dialer.dial(ctx, u, info, options) 553 } 554 555 func (l *links) handler(linkType linkType, options linkOptions, conn net.Conn, success func()) error { 556 meta := version_getBaseMetadata() 557 meta.publicKey = l.core.public 558 meta.priority = options.priority 559 metaBytes, err := meta.encode(l.core.secret, options.password) 560 if err != nil { 561 return fmt.Errorf("failed to generate handshake: %w", err) 562 } 563 if err := conn.SetDeadline(time.Now().Add(time.Second * 6)); err != nil { 564 return fmt.Errorf("failed to set handshake deadline: %w", err) 565 } 566 n, err := conn.Write(metaBytes) 567 switch { 568 case err != nil: 569 return fmt.Errorf("write handshake: %w", err) 570 case err == nil && n != len(metaBytes): 571 return fmt.Errorf("incomplete handshake send") 572 } 573 meta = version_metadata{} 574 base := version_getBaseMetadata() 575 if err := meta.decode(conn, options.password); err != nil { 576 _ = conn.Close() 577 return err 578 } 579 if !meta.check() { 580 return fmt.Errorf("remote node incompatible version (local %s, remote %s)", 581 fmt.Sprintf("%d.%d", base.majorVer, base.minorVer), 582 fmt.Sprintf("%d.%d", meta.majorVer, meta.minorVer), 583 ) 584 } 585 if err = conn.SetDeadline(time.Time{}); err != nil { 586 return fmt.Errorf("failed to clear handshake deadline: %w", err) 587 } 588 // Check if the remote side matches the keys we expected. This is a bit of a weak 589 // check - in future versions we really should check a signature or something like that. 590 if pinned := options.pinnedEd25519Keys; len(pinned) > 0 { 591 var key keyArray 592 copy(key[:], meta.publicKey) 593 if _, allowed := pinned[key]; !allowed { 594 return fmt.Errorf("node public key that does not match pinned keys") 595 } 596 } 597 // Check if we're authorized to connect to this key / IP 598 var allowed map[[32]byte]struct{} 599 phony.Block(l.core, func() { 600 allowed = l.core.config._allowedPublicKeys 601 }) 602 isallowed := len(allowed) == 0 603 for k := range allowed { 604 if bytes.Equal(k[:], meta.publicKey) { 605 isallowed = true 606 break 607 } 608 } 609 if linkType == linkTypeIncoming && !isallowed { 610 return fmt.Errorf("node public key %q is not in AllowedPublicKeys", hex.EncodeToString(meta.publicKey)) 611 } 612 613 dir := "outbound" 614 if linkType == linkTypeIncoming { 615 dir = "inbound" 616 } 617 remoteAddr := net.IP(address.AddrForKey(meta.publicKey)[:]).String() 618 remoteStr := fmt.Sprintf("%s@%s", remoteAddr, conn.RemoteAddr()) 619 localStr := conn.LocalAddr() 620 priority := options.priority 621 if meta.priority > priority { 622 priority = meta.priority 623 } 624 l.core.log.Infof("Connected %s: %s, source %s", 625 dir, remoteStr, localStr) 626 if success != nil { 627 success() 628 } 629 630 err = l.core.HandleConn(meta.publicKey, conn, priority) 631 switch err { 632 case io.EOF, net.ErrClosed, nil: 633 l.core.log.Infof("Disconnected %s: %s, source %s", 634 dir, remoteStr, localStr) 635 default: 636 l.core.log.Infof("Disconnected %s: %s, source %s; error: %s", 637 dir, remoteStr, localStr, err) 638 } 639 return nil 640 } 641 642 func urlForLinkInfo(u url.URL) url.URL { 643 u.RawQuery = "" 644 if host, _, err := net.SplitHostPort(u.Host); err == nil { 645 if addr, err := netip.ParseAddr(host); err == nil { 646 // For peers that look like multicast peers (i.e. 647 // link-local addresses), we will ignore the port number, 648 // otherwise we might open multiple connections to them. 649 if addr.IsLinkLocalUnicast() { 650 u.Host = fmt.Sprintf("[%s]", addr.String()) 651 } 652 } 653 } 654 return u 655 } 656 657 type linkConn struct { 658 // tx and rx are at the beginning of the struct to ensure 64-bit alignment 659 // on 32-bit platforms, see https://pkg.go.dev/sync/atomic#pkg-note-BUG 660 rx uint64 661 tx uint64 662 up time.Time 663 net.Conn 664 } 665 666 func (c *linkConn) Read(p []byte) (n int, err error) { 667 n, err = c.Conn.Read(p) 668 atomic.AddUint64(&c.rx, uint64(n)) 669 return 670 } 671 672 func (c *linkConn) Write(p []byte) (n int, err error) { 673 n, err = c.Conn.Write(p) 674 atomic.AddUint64(&c.tx, uint64(n)) 675 return 676 }