decred.org/dcrdex@v1.0.5/server/noderelay/noderelay.go (about) 1 // This code is available on the terms of the project LICENSE.md file, 2 // also available online at https://blueoakcouncil.org/license/1.0.0. 3 4 package noderelay 5 6 import ( 7 "context" 8 "crypto/elliptic" 9 "crypto/tls" 10 "encoding/json" 11 "errors" 12 "fmt" 13 "io" 14 "math/rand" 15 "net" 16 "net/http" 17 "os" 18 "path/filepath" 19 "regexp" 20 "sync" 21 "sync/atomic" 22 "time" 23 24 "decred.org/dcrdex/dex" 25 "decred.org/dcrdex/dex/ws" 26 "github.com/decred/dcrd/certgen" 27 ) 28 29 const ( 30 // PongWait is the time allowed to read the next pong message from the peer. 31 PongWait = 60 * time.Second 32 // PingPeriod sets the frequency on which websocket clients should ping. 33 PingPeriod = 30 * time.Second 34 // connectTimeoutSeconds is used to ensure timely connections. 35 connectTimeoutSeconds = 10 36 // expireTime is the time after sending a request to the source node before 37 // we consider the request failed. 38 expireTime = time.Second * 30 39 ) 40 41 // sourceNode represents a connected source node. 42 type sourceNode struct { 43 relayID string 44 addr string 45 cl *ws.WSLink 46 47 reqMtx sync.Mutex 48 respHandlers map[uint64]*responseHandler 49 } 50 51 // logReq stores the response handler in the respHandlers map. Requests to the 52 // client are associated with a response handler. 53 func (n *sourceNode) logReq(reqID uint64, respHandler func([]byte, map[string][]string), expire func()) { 54 n.reqMtx.Lock() 55 defer n.reqMtx.Unlock() 56 doExpire := func() { 57 // Delete the response handler, and call the provided expire function if 58 // *Nexus has not already retrieved the handler function for execution. 59 if n.expireRequest(reqID) { 60 expire() 61 } 62 } 63 n.respHandlers[reqID] = &responseHandler{ 64 f: respHandler, 65 expire: time.AfterFunc(expireTime, doExpire), 66 } 67 } 68 69 // expireRequest expires the pending request. 70 func (n *sourceNode) expireRequest(reqID uint64) bool { 71 n.reqMtx.Lock() 72 defer n.reqMtx.Unlock() 73 _, removed := n.respHandlers[reqID] 74 delete(n.respHandlers, reqID) 75 return removed 76 } 77 78 // respHandler gets the stored responseHandler, if it exists, else nil. 79 func (n *sourceNode) respHandler(reqID uint64) *responseHandler { 80 n.reqMtx.Lock() 81 defer n.reqMtx.Unlock() 82 cb, ok := n.respHandlers[reqID] 83 if ok { 84 // Stop the expiration Timer. If the Timer fired after respHandler was 85 // called, but we found the response handler in the map, wsLink.expire 86 // is waiting for the reqMtx lock and will return false, thus preventing 87 // the registered expire func from executing. 88 cb.expire.Stop() 89 delete(n.respHandlers, reqID) 90 } 91 return cb 92 } 93 94 // responseHandler is a handler for the response from a sent WebSockets request. 95 type responseHandler struct { 96 f func([]byte, map[string][]string) 97 expire *time.Timer 98 } 99 100 // nodeRelay manages source nodes. 101 type nodeRelay struct { 102 sync.RWMutex 103 sources map[string]*sourceNode 104 } 105 106 type NexusConfig struct { 107 // ExternalAddr is the external IP:port address or host(:port) of the 108 // Nexus relay manager. The operator must configure this independently 109 // so that the External address is routed to the specified Port listening 110 // on all loopback interfaces. 111 ExternalAddr string 112 // Port is the port that Nexus will listen on. 113 Port string 114 // Dir is a directory to output relayfiles and generated TLS key-cert pairs. 115 Dir string 116 // Key is the path to a TLS key. If Key == "", a new key and certificate 117 // will be created in the Dir. The ExternalAddr will be added to the 118 // certificate as a host. If the ExternalAddr changes, a new certificate 119 // can be generated by deleting the old key-cert pair and restarting. 120 // Changing the ExternalAddr renders any previously generated relayfiles 121 // void. 122 Key string 123 // Cert is the path to a TLS certificate. See docs for Key. 124 Cert string 125 Logger dex.Logger 126 // RelayIDs are the relay IDs for which to start node relays. These can be 127 // any string the caller chooses. These relay IDs are given to source node 128 // operators (generally as part of a relayfile) and are used to configure 129 // their source nodes. The channel returned from WaitForSourceNodes will 130 // not close until there is at least one source node connected for every 131 // ID in RelayIDs. 132 RelayIDs []string 133 } 134 135 // normalize checks sanity and sets defaults for the NexusConfig. 136 func (cfg *NexusConfig) normalize() error { 137 const ( 138 defaultNexusPort = "17537" 139 keyFilename = "relay.key" 140 certFilename = "relay.cert" 141 ) 142 if len(cfg.RelayIDs) == 0 { 143 return errors.New("no relays specified") 144 } 145 re := regexp.MustCompile(`\s`) 146 for _, relayID := range cfg.RelayIDs { 147 if re.MatchString(relayID) { 148 return fmt.Errorf("relay ID %q contains whitespace", relayID) 149 } 150 } 151 if cfg.Port == "" { 152 cfg.Port = defaultNexusPort 153 } 154 if cfg.Key == "" { 155 cfg.Key = filepath.Join(cfg.Dir, keyFilename) 156 } 157 if cfg.Cert == "" { 158 cfg.Cert = filepath.Join(cfg.Dir, certFilename) 159 } 160 return nil 161 } 162 163 // prepareKeys loads the TLS certificate, creating a key-cert pair if necessary. 164 func (cfg *NexusConfig) prepareKeys() (*tls.Config, []byte, error) { 165 keyExists := dex.FileExists(cfg.Key) 166 certExists := dex.FileExists(cfg.Cert) 167 if certExists == !keyExists { 168 return nil, nil, fmt.Errorf("missing cert pair file") 169 } 170 if !keyExists { 171 // certgen will actually ignore the port, but we'll remove it for good 172 // measure. 173 var dnsNames []string 174 if cfg.ExternalAddr != "" { 175 host, _, err := net.SplitHostPort(cfg.ExternalAddr) 176 if err != nil { 177 return nil, nil, fmt.Errorf("error parsing public address: %v", err) 178 } 179 dnsNames = []string{host} 180 } 181 err := genCertPair(cfg.Cert, cfg.Key, dnsNames, cfg.Logger) 182 if err != nil { 183 return nil, nil, err 184 } 185 } 186 keypair, err := tls.LoadX509KeyPair(cfg.Cert, cfg.Key) 187 if err != nil { 188 return nil, nil, err 189 } 190 191 certB, err := os.ReadFile(cfg.Cert) 192 if err != nil { 193 return nil, nil, fmt.Errorf("error loading certificate file contents: %v", err) 194 } 195 196 // Prepare the TLS configuration. 197 return &tls.Config{ 198 Certificates: []tls.Certificate{keypair}, 199 MinVersion: tls.VersionTLS12, 200 }, certB, nil 201 } 202 203 // Nexus is run on the server and manages a series of node relays. A source node 204 // will connect to the Nexus, making their services available for a local 205 // consumer. 206 type Nexus struct { 207 ctx context.Context 208 cfg *NexusConfig 209 tlsConfig *tls.Config 210 relayAddrs map[string]string 211 log dex.Logger 212 wg sync.WaitGroup 213 certB []byte 214 relayfileDir string 215 allNodesConnected chan struct{} 216 relays map[string]*nodeRelay 217 } 218 219 // NewNexus is the constructor for a Nexus. 220 func NewNexus(cfg *NexusConfig) (*Nexus, error) { 221 if err := cfg.normalize(); err != nil { 222 return nil, err 223 } 224 relayfileDir := filepath.Join(cfg.Dir, "relay-files") 225 if err := os.MkdirAll(relayfileDir, 0700); err != nil { 226 return nil, fmt.Errorf("error creating relay file directory: %w", err) 227 } 228 tlsConfig, certB, err := cfg.prepareKeys() 229 if err != nil { 230 return nil, err 231 } 232 233 relays := make(map[string]*nodeRelay, len(cfg.RelayIDs)) 234 for _, relayID := range cfg.RelayIDs { 235 relays[relayID] = &nodeRelay{ 236 sources: make(map[string]*sourceNode), 237 } 238 } 239 240 return &Nexus{ 241 cfg: cfg, 242 tlsConfig: tlsConfig, 243 relayAddrs: make(map[string]string), 244 log: cfg.Logger, 245 relays: relays, 246 certB: certB, 247 relayfileDir: relayfileDir, 248 allNodesConnected: make(chan struct{}), 249 }, nil 250 } 251 252 // RelayAddr returns the local address for relay, or an error if there is no 253 // server running for the given relay ID. 254 func (n *Nexus) RelayAddr(relayID string) (string, error) { 255 relayAddr, found := n.relayAddrs[relayID] 256 if !found { 257 return "", fmt.Errorf("no relay node found for ID %q", relayID) 258 } 259 return relayAddr, nil 260 } 261 262 // monitorNodeConnections checks the status of relays once per second, and 263 // closes the allNodesConnected channel when every relay has at least one 264 // source node. 265 func (n *Nexus) monitorNodeConnections() { 266 nodeReport := func() (registered, unregistered []string) { 267 for relayID, relay := range n.relays { 268 relay.RLock() 269 if len(relay.sources) > 0 { 270 registered = append(registered, relayID) 271 } else { 272 unregistered = append(unregistered, relayID) 273 } 274 relay.RUnlock() 275 } 276 return 277 } 278 279 n.log.Infof("Node relay waiting on %d source nodes to connect", len(n.relays)) 280 lastLog := time.Time{} 281 for { 282 if r, u := nodeReport(); len(u) == 0 { 283 close(n.allNodesConnected) 284 return 285 } else if time.Since(lastLog) > time.Minute { 286 lastLog = time.Now() 287 n.log.Infof("Node relay waiting on sources. %d / %d connected. Missing sources for relays %+v", len(r), len(r)+len(u), u) 288 } 289 select { 290 case <-time.After(time.Second): 291 case <-n.ctx.Done(): 292 return 293 } 294 } 295 } 296 297 // WaitForSourceNodes returns a channel that will be closed when a source node 298 // has connected for all relays. 299 func (n *Nexus) WaitForSourceNodes() <-chan struct{} { 300 return n.allNodesConnected 301 } 302 303 // RelayFile is used for encoding JSON relayfiles. A relayfile is a file that 304 // contains all the relevant connection information for a source node 305 // configuration. Nexus will generate a relayfile for each relay ID on startup. 306 type RelayFile struct { 307 RelayID string `json:"relayID"` 308 Cert dex.Bytes `json:"cert"` 309 Addr string `json:"addr"` 310 } 311 312 // Connect starts the Nexus, creating a relay node for every relay ID. 313 func (n *Nexus) Connect(ctx context.Context) (*sync.WaitGroup, error) { 314 n.ctx = ctx 315 316 log, wg := n.cfg.Logger, &n.wg 317 318 inAddr := "0.0.0.0:" + n.cfg.Port 319 320 // Create listener. 321 listener, err := tls.Listen("tcp", inAddr, n.tlsConfig) 322 if err != nil { 323 return nil, fmt.Errorf("can't listen on %s. nexus server quitting: %w", inAddr, err) 324 } 325 // Update the listening address in case a :0 was provided. 326 addr := listener.Addr().String() 327 328 for _, relayID := range n.cfg.RelayIDs { 329 relayAddr, err := n.runRelayServer(relayID) 330 if err != nil { 331 return nil, fmt.Errorf("error running node server for relay ID %s", relayID) 332 } 333 n.relayAddrs[relayID] = relayAddr 334 335 relayfilePath := filepath.Join(n.relayfileDir, relayID+".relayfile") 336 337 b, err := json.Marshal(&RelayFile{ 338 RelayID: relayID, 339 Cert: n.certB, 340 Addr: n.cfg.ExternalAddr, 341 }) 342 if err != nil { 343 n.log.Errorf("error encoding relay file: %v", err) 344 } else if err = os.WriteFile(relayfilePath, b, 0600); err != nil { 345 n.log.Errorf("error writing relay file: %v", err) 346 } 347 } 348 349 srv := &http.Server{ 350 Handler: http.HandlerFunc(n.handleSourceConnect), 351 ReadTimeout: connectTimeoutSeconds * time.Second, // slow requests should not hold connections opened 352 WriteTimeout: connectTimeoutSeconds * time.Second, // hung responses must die 353 } 354 355 // Close the listener on context cancellation. 356 wg.Add(1) 357 go func() { 358 defer wg.Done() 359 <-ctx.Done() 360 if err := srv.Shutdown(context.Background()); err != nil { 361 // Error from closing listeners: 362 log.Errorf("HTTP server Shutdown: %v", err) 363 } 364 }() 365 366 wg.Add(1) 367 go func() { 368 defer wg.Done() 369 if err := srv.Serve(listener); !errors.Is(err, http.ErrServerClosed) { 370 log.Warnf("unexpected (http.Server).Serve error: %v", err) 371 } 372 log.Infof("Server off") 373 }() 374 log.Infof("Noderelay server listening on %s", addr) 375 376 go n.monitorNodeConnections() 377 378 return &n.wg, nil 379 } 380 381 // handleSourceConnect handles a connection from a source node, upgrading the 382 // connection to a websocket connection and adding the sourceNode to the 383 // relayNode.sources. 384 func (n *Nexus) handleSourceConnect(w http.ResponseWriter, r *http.Request) { 385 wg, log, ctx := &n.wg, n.log, n.ctx 386 conn, err := ws.NewConnection(w, r, PongWait) 387 if err != nil { 388 log.Errorf("ws connection error: %v", err) 389 return 390 } 391 ip := dex.NewIPKey(r.RemoteAddr) 392 393 wg.Add(1) 394 go func() { 395 defer wg.Done() 396 397 cl := ws.NewWSLink(ip.String(), conn, PingPeriod, nil, log) 398 defer cl.Disconnect() 399 400 node := &sourceNode{ 401 cl: cl, 402 addr: r.RemoteAddr, 403 respHandlers: make(map[uint64]*responseHandler), 404 } 405 406 registered := make(chan error) 407 cl.RawHandler = func(b []byte) { 408 var resp RelayedMessage 409 if err := json.Unmarshal(b, &resp); err != nil { 410 n.log.Errorf("error unmarshalling connect message: %v", err) 411 return 412 } 413 414 if resp.MessageID == 0 { 415 node.relayID = string(resp.Body) 416 select { 417 case registered <- nil: 418 default: 419 log.Debugf("blocking node id channel") 420 } 421 return 422 } 423 if node.relayID == "" { 424 registered <- fmt.Errorf("received numbered request from %s before node ID", ip) 425 cl.Disconnect() 426 return 427 } 428 respHandler := node.respHandler(resp.MessageID) 429 if respHandler == nil { 430 n.log.Errorf("no handler for response from %s", ip) 431 return 432 } 433 respHandler.f(resp.Body, resp.Headers) 434 } 435 cm := dex.NewConnectionMaster(cl) 436 err := cm.ConnectOnce(ctx) // we discard the cm anyway, but good practice 437 if err != nil { 438 log.Errorf("websocketHandler client connect: %v", err) 439 return 440 } 441 442 const readLimit = 2_097_152 // 2 MiB 443 cl.SetReadLimit(readLimit) 444 445 select { 446 case err := <-registered: 447 if err != nil { 448 log.Error(err) 449 return 450 } 451 case <-time.After(connectTimeoutSeconds * time.Second): 452 log.Errorf("connected nexus source failed to ID") 453 return 454 case <-ctx.Done(): 455 return 456 } 457 458 if _, found := n.relayAddrs[node.relayID]; !found { 459 log.Warnf("source node trying to register with unknown relay ID %s", node.relayID) 460 return 461 } 462 463 relay, exists := n.relays[node.relayID] 464 if !exists { 465 log.Errorf("no relay with ID %s for source node connecting from %s", node.relayID, node.addr) 466 return 467 } 468 relay.Lock() 469 if oldNode, exists := relay.sources[node.addr]; exists { 470 oldNode.cl.Disconnect() 471 } 472 relay.sources[node.addr] = node 473 nodeCount := len(relay.sources) 474 relay.Unlock() 475 476 log.Infof("Source node for relay %q has connected from IP %s. %d sources now serving this relay", node.relayID, node.addr, nodeCount) 477 478 defer func() { 479 relay.Lock() 480 delete(relay.sources, node.addr) 481 nodeCount := len(relay.sources) 482 relay.Unlock() 483 log.Infof("Source node %s has disconnected from relay %s. %d sources now serving this relay", node.addr, node.relayID, nodeCount) 484 485 }() 486 487 cm.Wait() 488 }() 489 } 490 491 // RelayedMessage is the format with which HTTP requests are routed over the 492 // source nodes' WebSocket connections. 493 type RelayedMessage struct { 494 MessageID uint64 `json:"messageID"` 495 Method string `json:"method,omitempty"` 496 Body dex.Bytes `json:"body"` 497 Headers map[string][]string `json:"headers,omitempty"` 498 } 499 500 var messageIDCounter uint64 501 502 // runRelayServer runs a relayNode server. This server accepts requests from 503 // local consumers and routes them to a waiting source node connection. 504 func (n *Nexus) runRelayServer(relayID string) (string, error) { 505 log, wg := n.cfg.Logger, &n.wg 506 507 l, err := net.Listen("tcp", "127.0.0.1:0") 508 if err != nil { 509 return "", fmt.Errorf("error getting nexus listener for relay ID %s: %w", relayID, err) 510 } 511 512 relayAddr := l.Addr().String() 513 mgr := n.relays[relayID] 514 515 // This is a request coming from a local dcrdex backend. Send it to any 516 // waiting sourceNode and handle the response. 517 handleRequest := func(w http.ResponseWriter, r *http.Request) { 518 // Parse the request. 519 b, err := io.ReadAll(r.Body) 520 r.Body.Close() 521 if err != nil { 522 n.log.Errorf("Error reading request for relay ID %s: %v", relayID, err) 523 http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError) 524 return 525 } 526 527 // Format for WebSockets. 528 reqID := atomic.AddUint64(&messageIDCounter, 1) 529 reqB, err := json.Marshal(&RelayedMessage{ 530 MessageID: reqID, 531 Method: r.Method, 532 Body: b, 533 Headers: r.Header, 534 }) 535 if err != nil { 536 log.Errorf("Error marshaling RelayedMessage: %v", err) 537 http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError) 538 return 539 } 540 541 // Prepare a list of sources. 542 mgr.RLock() 543 nodeList := make([]*sourceNode, 0, len(mgr.sources)) 544 for _, n := range mgr.sources { 545 nodeList = append(nodeList, n) 546 } 547 mgr.RUnlock() 548 if len(nodeList) == 0 { 549 http.Error(w, fmt.Sprintf("No nodes connected for relay %s", relayID), http.StatusServiceUnavailable) 550 return 551 } 552 553 // Randomly shuffle the list. 554 rand.Shuffle(len(nodeList), func(i, j int) { 555 nodeList[i], nodeList[j] = nodeList[j], nodeList[i] 556 }) 557 558 // result is used to track the best result from the nodeList. The first 559 // non-error result is used to respond to the consumer. 560 type result struct { 561 body []byte 562 hdrs map[string][]string 563 err error 564 } 565 566 var res *result 567 568 out: 569 for i, node := range nodeList { 570 resultC := make(chan *result) 571 node.logReq(reqID, func(body []byte, hdrs map[string][]string) { 572 resultC <- &result{body: body, hdrs: hdrs} 573 }, func() { 574 resultC <- &result{err: fmt.Errorf("request expired")} 575 }) 576 577 node.cl.SendRaw(reqB) 578 select { 579 case res = <-resultC: 580 if res.err == nil { 581 break out 582 } 583 log.Errorf("Error requesting data from %s node at %s: %v", node.relayID, node.addr, res.err) 584 if i < len(nodeList)-1 { 585 log.Infof("Trying another source node") 586 } 587 case <-n.ctx.Done(): 588 return 589 } 590 } 591 if res == nil || res.err != nil { 592 http.Error(w, "all source nodes errored", http.StatusTeapot) 593 return 594 } 595 w.Header().Set("Content-Type", "application/json; charset=utf-8") 596 for k, vs := range res.hdrs { 597 for _, v := range vs { 598 w.Header().Set(k, v) 599 } 600 } 601 w.WriteHeader(http.StatusOK) 602 if _, err = w.Write(res.body); err != nil { 603 log.Errorf("Write error: %v", err) 604 } 605 } 606 607 // Start the server. 608 srv := &http.Server{ 609 Addr: relayAddr, 610 Handler: http.HandlerFunc(handleRequest), 611 ReadTimeout: connectTimeoutSeconds * time.Second, 612 WriteTimeout: connectTimeoutSeconds * time.Second, 613 } 614 615 wg.Add(1) 616 go func() { 617 defer wg.Done() 618 if err := srv.Serve(l); !errors.Is(err, http.ErrServerClosed) { 619 log.Errorf("listen: %s\n", err) 620 } 621 log.Infof("Nexus no longer serving relay %s: err = %v", relayID, err) 622 }() 623 624 wg.Add(1) 625 go func() { 626 defer wg.Done() 627 <-n.ctx.Done() 628 ctx, cancel := context.WithTimeout(context.Background(), time.Second*2) 629 defer cancel() 630 if err := srv.Shutdown(ctx); err != nil { 631 log.Errorf("http.Server Shutdown errored: %v", err) 632 } 633 }() 634 635 return relayAddr, nil 636 } 637 638 // genCertPair generates a key/cert pair to the paths provided. 639 func genCertPair(certFile, keyFile string, hosts []string, log dex.Logger) error { 640 log.Infof("Generating TLS certificates...") 641 642 org := "dcrdex nexus autogenerated cert" 643 validUntil := time.Now().Add(10 * 365 * 24 * time.Hour) 644 cert, key, err := certgen.NewTLSCertPair(elliptic.P521(), org, 645 validUntil, hosts) 646 if err != nil { 647 return err 648 } 649 650 // Write cert and key files. 651 if err = os.WriteFile(certFile, cert, 0644); err != nil { 652 return err 653 } 654 if err = os.WriteFile(keyFile, key, 0600); err != nil { 655 os.Remove(certFile) 656 return err 657 } 658 659 log.Infof("Done generating TLS certificates") 660 return nil 661 }