github.com/nats-io/nats-server/v2@v2.11.0-preview.2/server/server.go (about) 1 // Copyright 2012-2024 The NATS Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package server 15 16 import ( 17 "bytes" 18 "context" 19 "crypto/tls" 20 "encoding/json" 21 "errors" 22 "flag" 23 "fmt" 24 "hash/fnv" 25 "io" 26 "log" 27 "math/rand" 28 "net" 29 "net/http" 30 "regexp" 31 "runtime/pprof" 32 33 // Allow dynamic profiling. 34 _ "net/http/pprof" 35 "os" 36 "path" 37 "path/filepath" 38 "runtime" 39 "strconv" 40 "strings" 41 "sync" 42 "sync/atomic" 43 "time" 44 45 "github.com/klauspost/compress/s2" 46 "github.com/nats-io/jwt/v2" 47 "github.com/nats-io/nkeys" 48 "github.com/nats-io/nuid" 49 50 "github.com/nats-io/nats-server/v2/logger" 51 ) 52 53 const ( 54 // Interval for the first PING for non client connections. 55 firstPingInterval = time.Second 56 57 // This is for the first ping for client connections. 58 firstClientPingInterval = 2 * time.Second 59 ) 60 61 // These are protocol versions sent between server connections: ROUTER, LEAF and 62 // GATEWAY. We may have protocol versions that have a meaning only for a certain 63 // type of connections, but we don't have to have separate enums for that. 64 // However, it is CRITICAL to not change the order of those constants since they 65 // are exchanged between servers. When adding a new protocol version, add to the 66 // end of the list, don't try to group them by connection types. 67 const ( 68 // RouteProtoZero is the original Route protocol from 2009. 69 // http://nats.io/documentation/internals/nats-protocol/ 70 RouteProtoZero = iota 71 // RouteProtoInfo signals a route can receive more then the original INFO block. 72 // This can be used to update remote cluster permissions, etc... 73 RouteProtoInfo 74 // RouteProtoV2 is the new route/cluster protocol that provides account support. 75 RouteProtoV2 76 // MsgTraceProto indicates that this server understands distributed message tracing. 77 MsgTraceProto 78 ) 79 80 // Will return the latest server-to-server protocol versions, unless the 81 // option to override it is set. 82 func (s *Server) getServerProto() int { 83 opts := s.getOpts() 84 // Initialize with the latest protocol version. 85 proto := MsgTraceProto 86 // For tests, we want to be able to make this server behave 87 // as an older server so check this option to see if we should override. 88 if opts.overrideProto < 0 { 89 // The option overrideProto is set to 0 by default (when creating an 90 // Options structure). Since this is the same value than the original 91 // proto RouteProtoZero, tests call setServerProtoForTest() with the 92 // desired protocol level, which sets it as negative value equal to: 93 // (wantedProto + 1) * -1. Here we compute back the real value. 94 proto = (opts.overrideProto * -1) - 1 95 } 96 return proto 97 } 98 99 // Used by tests. 100 func setServerProtoForTest(wantedProto int) int { 101 return (wantedProto + 1) * -1 102 } 103 104 // Info is the information sent to clients, routes, gateways, and leaf nodes, 105 // to help them understand information about this server. 106 type Info struct { 107 ID string `json:"server_id"` 108 Name string `json:"server_name"` 109 Version string `json:"version"` 110 Proto int `json:"proto"` 111 GitCommit string `json:"git_commit,omitempty"` 112 GoVersion string `json:"go"` 113 Host string `json:"host"` 114 Port int `json:"port"` 115 Headers bool `json:"headers"` 116 AuthRequired bool `json:"auth_required,omitempty"` 117 TLSRequired bool `json:"tls_required,omitempty"` 118 TLSVerify bool `json:"tls_verify,omitempty"` 119 TLSAvailable bool `json:"tls_available,omitempty"` 120 MaxPayload int32 `json:"max_payload"` 121 JetStream bool `json:"jetstream,omitempty"` 122 IP string `json:"ip,omitempty"` 123 CID uint64 `json:"client_id,omitempty"` 124 ClientIP string `json:"client_ip,omitempty"` 125 Nonce string `json:"nonce,omitempty"` 126 Cluster string `json:"cluster,omitempty"` 127 Dynamic bool `json:"cluster_dynamic,omitempty"` 128 Domain string `json:"domain,omitempty"` 129 ClientConnectURLs []string `json:"connect_urls,omitempty"` // Contains URLs a client can connect to. 130 WSConnectURLs []string `json:"ws_connect_urls,omitempty"` // Contains URLs a ws client can connect to. 131 LameDuckMode bool `json:"ldm,omitempty"` 132 Compression string `json:"compression,omitempty"` 133 134 // Route Specific 135 Import *SubjectPermission `json:"import,omitempty"` 136 Export *SubjectPermission `json:"export,omitempty"` 137 LNOC bool `json:"lnoc,omitempty"` 138 InfoOnConnect bool `json:"info_on_connect,omitempty"` // When true the server will respond to CONNECT with an INFO 139 ConnectInfo bool `json:"connect_info,omitempty"` // When true this is the server INFO response to CONNECT 140 RoutePoolSize int `json:"route_pool_size,omitempty"` 141 RoutePoolIdx int `json:"route_pool_idx,omitempty"` 142 RouteAccount string `json:"route_account,omitempty"` 143 RouteAccReqID string `json:"route_acc_add_reqid,omitempty"` 144 145 // Gateways Specific 146 Gateway string `json:"gateway,omitempty"` // Name of the origin Gateway (sent by gateway's INFO) 147 GatewayURLs []string `json:"gateway_urls,omitempty"` // Gateway URLs in the originating cluster (sent by gateway's INFO) 148 GatewayURL string `json:"gateway_url,omitempty"` // Gateway URL on that server (sent by route's INFO) 149 GatewayCmd byte `json:"gateway_cmd,omitempty"` // Command code for the receiving server to know what to do 150 GatewayCmdPayload []byte `json:"gateway_cmd_payload,omitempty"` // Command payload when needed 151 GatewayNRP bool `json:"gateway_nrp,omitempty"` // Uses new $GNR. prefix for mapped replies 152 GatewayIOM bool `json:"gateway_iom,omitempty"` // Indicate that all accounts will be switched to InterestOnly mode "right away" 153 154 // LeafNode Specific 155 LeafNodeURLs []string `json:"leafnode_urls,omitempty"` // LeafNode URLs that the server can reconnect to. 156 RemoteAccount string `json:"remote_account,omitempty"` // Lets the other side know the remote account that they bind to. 157 158 XKey string `json:"xkey,omitempty"` // Public server's x25519 key. 159 } 160 161 // Server is our main struct. 162 type Server struct { 163 // Fields accessed with atomic operations need to be 64-bit aligned 164 gcid uint64 165 // How often user logon fails due to the issuer account not being pinned. 166 pinnedAccFail uint64 167 stats 168 scStats 169 mu sync.RWMutex 170 reloadMu sync.RWMutex // Write-locked when a config reload is taking place ONLY 171 kp nkeys.KeyPair 172 xkp nkeys.KeyPair 173 xpub string 174 info Info 175 configFile string 176 optsMu sync.RWMutex 177 opts *Options 178 running atomic.Bool 179 shutdown atomic.Bool 180 listener net.Listener 181 listenerErr error 182 gacc *Account 183 sys *internal 184 js atomic.Pointer[jetStream] 185 isMetaLeader atomic.Bool 186 accounts sync.Map 187 tmpAccounts sync.Map // Temporarily stores accounts that are being built 188 activeAccounts int32 189 accResolver AccountResolver 190 clients map[uint64]*client 191 routes map[string][]*client 192 routesPoolSize int // Configured pool size 193 routesReject bool // During reload, we may want to reject adding routes until some conditions are met 194 routesNoPool int // Number of routes that don't use pooling (connecting to older server for instance) 195 accRoutes map[string]map[string]*client // Key is account name, value is key=remoteID/value=route connection 196 accRouteByHash sync.Map // Key is account name, value is nil or a pool index 197 accAddedCh chan struct{} 198 accAddedReqID string 199 leafs map[uint64]*client 200 users map[string]*User 201 nkeys map[string]*NkeyUser 202 totalClients uint64 203 closed *closedRingBuffer 204 done chan bool 205 start time.Time 206 http net.Listener 207 httpHandler http.Handler 208 httpBasePath string 209 profiler net.Listener 210 httpReqStats map[string]uint64 211 routeListener net.Listener 212 routeListenerErr error 213 routeInfo Info 214 routeResolver netResolver 215 routesToSelf map[string]struct{} 216 routeTLSName string 217 leafNodeListener net.Listener 218 leafNodeListenerErr error 219 leafNodeInfo Info 220 leafNodeInfoJSON []byte 221 leafURLsMap refCountedUrlSet 222 leafNodeOpts struct { 223 resolver netResolver 224 dialTimeout time.Duration 225 } 226 leafRemoteCfgs []*leafNodeCfg 227 leafRemoteAccounts sync.Map 228 leafNodeEnabled bool 229 leafDisableConnect bool // Used in test only 230 231 quitCh chan struct{} 232 startupComplete chan struct{} 233 shutdownComplete chan struct{} 234 235 // Tracking Go routines 236 grMu sync.Mutex 237 grTmpClients map[uint64]*client 238 grRunning bool 239 grWG sync.WaitGroup // to wait on various go routines 240 241 cproto int64 // number of clients supporting async INFO 242 configTime time.Time // last time config was loaded 243 244 logging struct { 245 sync.RWMutex 246 logger Logger 247 trace int32 248 debug int32 249 traceSysAcc int32 250 } 251 252 clientConnectURLs []string 253 254 // Used internally for quick look-ups. 255 clientConnectURLsMap refCountedUrlSet 256 257 lastCURLsUpdate int64 258 259 // For Gateways 260 gatewayListener net.Listener // Accept listener 261 gatewayListenerErr error 262 gateway *srvGateway 263 264 // Used by tests to check that http.Servers do 265 // not set any timeout. 266 monitoringServer *http.Server 267 profilingServer *http.Server 268 269 // LameDuck mode 270 ldm bool 271 ldmCh chan bool 272 273 // Trusted public operator keys. 274 trustedKeys []string 275 // map of trusted keys to operator setting StrictSigningKeyUsage 276 strictSigningKeyUsage map[string]struct{} 277 278 // We use this to minimize mem copies for requests to monitoring 279 // endpoint /varz (when it comes from http). 280 varzMu sync.Mutex 281 varz *Varz 282 // This is set during a config reload if we detect that we have 283 // added/removed routes. The monitoring code then check that 284 // to know if it should update the cluster's URLs array. 285 varzUpdateRouteURLs bool 286 287 // Keeps a sublist of of subscriptions attached to leafnode connections 288 // for the $GNR.*.*.*.> subject so that a server can send back a mapped 289 // gateway reply. 290 gwLeafSubs *Sublist 291 292 // Used for expiration of mapped GW replies 293 gwrm struct { 294 w int32 295 ch chan time.Duration 296 m sync.Map 297 } 298 299 // For eventIDs 300 eventIds *nuid.NUID 301 302 // Websocket structure 303 websocket srvWebsocket 304 305 // MQTT structure 306 mqtt srvMQTT 307 308 // OCSP monitoring 309 ocsps []*OCSPMonitor 310 311 // OCSP peer verification (at least one TLS block) 312 ocspPeerVerify bool 313 314 // OCSP response cache 315 ocsprc OCSPResponseCache 316 317 // exporting account name the importer experienced issues with 318 incompleteAccExporterMap sync.Map 319 320 // Holds cluster name under different lock for mapping 321 cnMu sync.RWMutex 322 cn string 323 324 // For registering raft nodes with the server. 325 rnMu sync.RWMutex 326 raftNodes map[string]RaftNode 327 328 // For mapping from a raft node name back to a server name and cluster. Node has to be in the same domain. 329 nodeToInfo sync.Map 330 331 // For out of resources to not log errors too fast. 332 rerrMu sync.Mutex 333 rerrLast time.Time 334 335 connRateCounter *rateCounter 336 337 // If there is a system account configured, to still support the $G account, 338 // the server will create a fake user and add it to the list of users. 339 // Keep track of what that user name is for config reload purposes. 340 sysAccOnlyNoAuthUser string 341 342 // IPQueues map 343 ipQueues sync.Map 344 345 // To limit logging frequency 346 rateLimitLogging sync.Map 347 rateLimitLoggingCh chan time.Duration 348 349 // Total outstanding catchup bytes in flight. 350 gcbMu sync.RWMutex 351 gcbOut int64 352 gcbOutMax int64 // Taken from JetStreamMaxCatchup or defaultMaxTotalCatchupOutBytes 353 // A global chanel to kick out stalled catchup sequences. 354 gcbKick chan struct{} 355 356 // Total outbound syncRequests 357 syncOutSem chan struct{} 358 359 // Queue to process JS API requests that come from routes (or gateways) 360 jsAPIRoutedReqs *ipQueue[*jsAPIRoutedReq] 361 } 362 363 // For tracking JS nodes. 364 type nodeInfo struct { 365 name string 366 version string 367 cluster string 368 domain string 369 id string 370 tags jwt.TagList 371 cfg *JetStreamConfig 372 stats *JetStreamStats 373 offline bool 374 js bool 375 binarySnapshots bool 376 } 377 378 // Make sure all are 64bits for atomic use 379 type stats struct { 380 inMsgs int64 381 outMsgs int64 382 inBytes int64 383 outBytes int64 384 slowConsumers int64 385 } 386 387 // scStats includes the total and per connection counters of Slow Consumers. 388 type scStats struct { 389 clients atomic.Uint64 390 routes atomic.Uint64 391 leafs atomic.Uint64 392 gateways atomic.Uint64 393 } 394 395 // This is used by tests so we can run all server tests with a default route 396 // or leafnode compression mode. For instance: 397 // go test -race -v ./server -cluster_compression=fast 398 var ( 399 testDefaultClusterCompression string 400 testDefaultLeafNodeCompression string 401 ) 402 403 // Compression modes. 404 const ( 405 CompressionNotSupported = "not supported" 406 CompressionOff = "off" 407 CompressionAccept = "accept" 408 CompressionS2Auto = "s2_auto" 409 CompressionS2Uncompressed = "s2_uncompressed" 410 CompressionS2Fast = "s2_fast" 411 CompressionS2Better = "s2_better" 412 CompressionS2Best = "s2_best" 413 ) 414 415 // defaultCompressionS2AutoRTTThresholds is the default of RTT thresholds for 416 // the CompressionS2Auto mode. 417 var defaultCompressionS2AutoRTTThresholds = []time.Duration{ 418 // [0..10ms] -> CompressionS2Uncompressed 419 10 * time.Millisecond, 420 // ]10ms..50ms] -> CompressionS2Fast 421 50 * time.Millisecond, 422 // ]50ms..100ms] -> CompressionS2Better 423 100 * time.Millisecond, 424 // ]100ms..] -> CompressionS2Best 425 } 426 427 // For a given user provided string, matches to one of the compression mode 428 // constant and updates the provided string to that constant. Returns an 429 // error if the provided compression mode is not known. 430 // The parameter `chosenModeForOn` indicates which compression mode to use 431 // when the user selects "on" (or enabled, true, etc..). This is because 432 // we may have different defaults depending on where the compression is used. 433 func validateAndNormalizeCompressionOption(c *CompressionOpts, chosenModeForOn string) error { 434 if c == nil { 435 return nil 436 } 437 cmtl := strings.ToLower(c.Mode) 438 // First, check for the "on" case so that we set to the default compression 439 // mode for that. The other switch/case will finish setup if needed (for 440 // instance if the default mode is s2Auto). 441 switch cmtl { 442 case "on", "enabled", "true": 443 cmtl = chosenModeForOn 444 default: 445 } 446 // Check (again) with the proper mode. 447 switch cmtl { 448 case "not supported", "not_supported": 449 c.Mode = CompressionNotSupported 450 case "disabled", "off", "false": 451 c.Mode = CompressionOff 452 case "accept": 453 c.Mode = CompressionAccept 454 case "auto", "s2_auto": 455 var rtts []time.Duration 456 if len(c.RTTThresholds) == 0 { 457 rtts = defaultCompressionS2AutoRTTThresholds 458 } else { 459 for _, n := range c.RTTThresholds { 460 // Do not error on negative, but simply set to 0 461 if n < 0 { 462 n = 0 463 } 464 // Make sure they are properly ordered. However, it is possible 465 // to have a "0" anywhere in the list to indicate that this 466 // compression level should not be used. 467 if l := len(rtts); l > 0 && n != 0 { 468 for _, v := range rtts { 469 if n < v { 470 return fmt.Errorf("RTT threshold values %v should be in ascending order", c.RTTThresholds) 471 } 472 } 473 } 474 rtts = append(rtts, n) 475 } 476 if len(rtts) > 0 { 477 // Trim 0 that are at the end. 478 stop := -1 479 for i := len(rtts) - 1; i >= 0; i-- { 480 if rtts[i] != 0 { 481 stop = i 482 break 483 } 484 } 485 rtts = rtts[:stop+1] 486 } 487 if len(rtts) > 4 { 488 // There should be at most values for "uncompressed", "fast", 489 // "better" and "best" (when some 0 are present). 490 return fmt.Errorf("compression mode %q should have no more than 4 RTT thresholds: %v", c.Mode, c.RTTThresholds) 491 } else if len(rtts) == 0 { 492 // But there should be at least 1 if the user provided the slice. 493 // We would be here only if it was provided by say with values 494 // being a single or all zeros. 495 return fmt.Errorf("compression mode %q requires at least one RTT threshold", c.Mode) 496 } 497 } 498 c.Mode = CompressionS2Auto 499 c.RTTThresholds = rtts 500 case "fast", "s2_fast": 501 c.Mode = CompressionS2Fast 502 case "better", "s2_better": 503 c.Mode = CompressionS2Better 504 case "best", "s2_best": 505 c.Mode = CompressionS2Best 506 default: 507 return fmt.Errorf("unsupported compression mode %q", c.Mode) 508 } 509 return nil 510 } 511 512 // Returns `true` if the compression mode `m` indicates that the server 513 // will negotiate compression with the remote server, `false` otherwise. 514 // Note that the provided compression mode is assumed to have been 515 // normalized and validated. 516 func needsCompression(m string) bool { 517 return m != _EMPTY_ && m != CompressionOff && m != CompressionNotSupported 518 } 519 520 // Compression is asymmetric, meaning that one side can have a different 521 // compression level than the other. However, we need to check for cases 522 // when this server `scm` or the remote `rcm` do not support compression 523 // (say older server, or test to make it behave as it is not), or have 524 // the compression off. 525 // Note that `scm` is assumed to not be "off" or "not supported". 526 func selectCompressionMode(scm, rcm string) (mode string, err error) { 527 if rcm == CompressionNotSupported || rcm == _EMPTY_ { 528 return CompressionNotSupported, nil 529 } 530 switch rcm { 531 case CompressionOff: 532 // If the remote explicitly disables compression, then we won't 533 // use compression. 534 return CompressionOff, nil 535 case CompressionAccept: 536 // If the remote is ok with compression (but is not initiating it), 537 // and if we too are in this mode, then it means no compression. 538 if scm == CompressionAccept { 539 return CompressionOff, nil 540 } 541 // Otherwise use our compression mode. 542 return scm, nil 543 case CompressionS2Auto, CompressionS2Uncompressed, CompressionS2Fast, CompressionS2Better, CompressionS2Best: 544 // This case is here to make sure that if we don't recognize a 545 // compression setting, we error out. 546 if scm == CompressionAccept { 547 // If our compression mode is "accept", then we will use the remote 548 // compression mode, except if it is "auto", in which case we will 549 // default to "fast". This is not a configuration (auto in one 550 // side and accept in the other) that would be recommended. 551 if rcm == CompressionS2Auto { 552 return CompressionS2Fast, nil 553 } 554 // Use their compression mode. 555 return rcm, nil 556 } 557 // Otherwise use our compression mode. 558 return scm, nil 559 default: 560 return _EMPTY_, fmt.Errorf("unsupported route compression mode %q", rcm) 561 } 562 } 563 564 // If the configured compression mode is "auto" then will return that, 565 // otherwise will return the given `cm` compression mode. 566 func compressionModeForInfoProtocol(co *CompressionOpts, cm string) string { 567 if co.Mode == CompressionS2Auto { 568 return CompressionS2Auto 569 } 570 return cm 571 } 572 573 // Given a connection RTT and a list of thresholds durations, this 574 // function will return an S2 compression level such as "uncompressed", 575 // "fast", "better" or "best". For instance, with the following slice: 576 // [5ms, 10ms, 15ms, 20ms], a RTT of up to 5ms will result 577 // in the compression level "uncompressed", ]5ms..10ms] will result in 578 // "fast" compression, etc.. 579 // However, the 0 value allows for disabling of some compression levels. 580 // For instance, the following slice: [0, 0, 20, 30] means that a RTT of 581 // [0..20ms] would result in the "better" compression - effectively disabling 582 // the use of "uncompressed" and "fast", then anything above 20ms would 583 // result in the use of "best" level (the 30 in the list has no effect 584 // and the list could have been simplified to [0, 0, 20]). 585 func selectS2AutoModeBasedOnRTT(rtt time.Duration, rttThresholds []time.Duration) string { 586 var idx int 587 var found bool 588 for i, d := range rttThresholds { 589 if rtt <= d { 590 idx = i 591 found = true 592 break 593 } 594 } 595 if !found { 596 // If we did not find but we have all levels, then use "best", 597 // otherwise use the last one in array. 598 if l := len(rttThresholds); l >= 3 { 599 idx = 3 600 } else { 601 idx = l - 1 602 } 603 } 604 switch idx { 605 case 0: 606 return CompressionS2Uncompressed 607 case 1: 608 return CompressionS2Fast 609 case 2: 610 return CompressionS2Better 611 } 612 return CompressionS2Best 613 } 614 615 // Returns an array of s2 WriterOption based on the route compression mode. 616 // So far we return a single option, but this way we can call s2.NewWriter() 617 // with a nil []s2.WriterOption, but not with a nil s2.WriterOption, so 618 // this is more versatile. 619 func s2WriterOptions(cm string) []s2.WriterOption { 620 _opts := [2]s2.WriterOption{} 621 opts := append( 622 _opts[:0], 623 s2.WriterConcurrency(1), // Stop asynchronous flushing in separate goroutines 624 ) 625 switch cm { 626 case CompressionS2Uncompressed: 627 return append(opts, s2.WriterUncompressed()) 628 case CompressionS2Best: 629 return append(opts, s2.WriterBestCompression()) 630 case CompressionS2Better: 631 return append(opts, s2.WriterBetterCompression()) 632 default: 633 return nil 634 } 635 } 636 637 // New will setup a new server struct after parsing the options. 638 // DEPRECATED: Use NewServer(opts) 639 func New(opts *Options) *Server { 640 s, _ := NewServer(opts) 641 return s 642 } 643 644 // NewServer will setup a new server struct after parsing the options. 645 // Could return an error if options can not be validated. 646 func NewServer(opts *Options) (*Server, error) { 647 setBaselineOptions(opts) 648 649 // Process TLS options, including whether we require client certificates. 650 tlsReq := opts.TLSConfig != nil 651 verify := (tlsReq && opts.TLSConfig.ClientAuth == tls.RequireAndVerifyClientCert) 652 653 // Create our server's nkey identity. 654 kp, _ := nkeys.CreateServer() 655 pub, _ := kp.PublicKey() 656 657 // Create an xkey for encrypting messages from this server. 658 xkp, _ := nkeys.CreateCurveKeys() 659 xpub, _ := xkp.PublicKey() 660 661 serverName := pub 662 if opts.ServerName != _EMPTY_ { 663 serverName = opts.ServerName 664 } 665 666 httpBasePath := normalizeBasePath(opts.HTTPBasePath) 667 668 // Validate some options. This is here because we cannot assume that 669 // server will always be started with configuration parsing (that could 670 // report issues). Its options can be (incorrectly) set by hand when 671 // server is embedded. If there is an error, return nil. 672 if err := validateOptions(opts); err != nil { 673 return nil, err 674 } 675 676 info := Info{ 677 ID: pub, 678 XKey: xpub, 679 Version: VERSION, 680 Proto: PROTO, 681 GitCommit: gitCommit, 682 GoVersion: runtime.Version(), 683 Name: serverName, 684 Host: opts.Host, 685 Port: opts.Port, 686 AuthRequired: false, 687 TLSRequired: tlsReq && !opts.AllowNonTLS, 688 TLSVerify: verify, 689 MaxPayload: opts.MaxPayload, 690 JetStream: opts.JetStream, 691 Headers: !opts.NoHeaderSupport, 692 Cluster: opts.Cluster.Name, 693 Domain: opts.JetStreamDomain, 694 } 695 696 if tlsReq && !info.TLSRequired { 697 info.TLSAvailable = true 698 } 699 700 now := time.Now() 701 702 s := &Server{ 703 kp: kp, 704 xkp: xkp, 705 xpub: xpub, 706 configFile: opts.ConfigFile, 707 info: info, 708 opts: opts, 709 done: make(chan bool, 1), 710 start: now, 711 configTime: now, 712 gwLeafSubs: NewSublistWithCache(), 713 httpBasePath: httpBasePath, 714 eventIds: nuid.New(), 715 routesToSelf: make(map[string]struct{}), 716 httpReqStats: make(map[string]uint64), // Used to track HTTP requests 717 rateLimitLoggingCh: make(chan time.Duration, 1), 718 leafNodeEnabled: opts.LeafNode.Port != 0 || len(opts.LeafNode.Remotes) > 0, 719 syncOutSem: make(chan struct{}, maxConcurrentSyncRequests), 720 } 721 722 // Fill up the maximum in flight syncRequests for this server. 723 // Used in JetStream catchup semantics. 724 for i := 0; i < maxConcurrentSyncRequests; i++ { 725 s.syncOutSem <- struct{}{} 726 } 727 728 if opts.TLSRateLimit > 0 { 729 s.connRateCounter = newRateCounter(opts.tlsConfigOpts.RateLimit) 730 } 731 732 // Trusted root operator keys. 733 if !s.processTrustedKeys() { 734 return nil, fmt.Errorf("Error processing trusted operator keys") 735 } 736 737 // If we have solicited leafnodes but no clustering and no clustername. 738 // However we may need a stable clustername so use the server name. 739 if len(opts.LeafNode.Remotes) > 0 && opts.Cluster.Port == 0 && opts.Cluster.Name == _EMPTY_ { 740 opts.Cluster.Name = opts.ServerName 741 } 742 743 if opts.Cluster.Name != _EMPTY_ { 744 // Also place into mapping cn with cnMu lock. 745 s.cnMu.Lock() 746 s.cn = opts.Cluster.Name 747 s.cnMu.Unlock() 748 } 749 750 s.mu.Lock() 751 defer s.mu.Unlock() 752 753 // Place ourselves in the JetStream nodeInfo if needed. 754 if opts.JetStream { 755 ourNode := getHash(serverName) 756 s.nodeToInfo.Store(ourNode, nodeInfo{ 757 serverName, 758 VERSION, 759 opts.Cluster.Name, 760 opts.JetStreamDomain, 761 info.ID, 762 opts.Tags, 763 &JetStreamConfig{MaxMemory: opts.JetStreamMaxMemory, MaxStore: opts.JetStreamMaxStore, CompressOK: true}, 764 nil, 765 false, true, true, 766 }) 767 } 768 769 s.routeResolver = opts.Cluster.resolver 770 if s.routeResolver == nil { 771 s.routeResolver = net.DefaultResolver 772 } 773 774 // Used internally for quick look-ups. 775 s.clientConnectURLsMap = make(refCountedUrlSet) 776 s.websocket.connectURLsMap = make(refCountedUrlSet) 777 s.leafURLsMap = make(refCountedUrlSet) 778 779 // Ensure that non-exported options (used in tests) are properly set. 780 s.setLeafNodeNonExportedOptions() 781 782 // Setup OCSP Stapling and OCSP Peer. This will abort server from starting if there 783 // are no valid staples and OCSP Stapling policy is set to Always or MustStaple. 784 if err := s.enableOCSP(); err != nil { 785 return nil, err 786 } 787 788 // Call this even if there is no gateway defined. It will 789 // initialize the structure so we don't have to check for 790 // it to be nil or not in various places in the code. 791 if err := s.newGateway(opts); err != nil { 792 return nil, err 793 } 794 795 // If we have a cluster definition but do not have a cluster name, create one. 796 if opts.Cluster.Port != 0 && opts.Cluster.Name == _EMPTY_ { 797 s.info.Cluster = nuid.Next() 798 } else if opts.Cluster.Name != _EMPTY_ { 799 // Likewise here if we have a cluster name set. 800 s.info.Cluster = opts.Cluster.Name 801 } 802 803 // This is normally done in the AcceptLoop, once the 804 // listener has been created (possibly with random port), 805 // but since some tests may expect the INFO to be properly 806 // set after New(), let's do it now. 807 s.setInfoHostPort() 808 809 // For tracking clients 810 s.clients = make(map[uint64]*client) 811 812 // For tracking closed clients. 813 s.closed = newClosedRingBuffer(opts.MaxClosedClients) 814 815 // For tracking connections that are not yet registered 816 // in s.routes, but for which readLoop has started. 817 s.grTmpClients = make(map[uint64]*client) 818 819 // For tracking routes and their remote ids 820 s.initRouteStructures(opts) 821 822 // For tracking leaf nodes. 823 s.leafs = make(map[uint64]*client) 824 825 // Used to kick out all go routines possibly waiting on server 826 // to shutdown. 827 s.quitCh = make(chan struct{}) 828 829 // Closed when startup is complete. ReadyForConnections() will block on 830 // this before checking the presence of listening sockets. 831 s.startupComplete = make(chan struct{}) 832 833 // Closed when Shutdown() is complete. Allows WaitForShutdown() to block 834 // waiting for complete shutdown. 835 s.shutdownComplete = make(chan struct{}) 836 837 // Check for configured account resolvers. 838 if err := s.configureResolver(); err != nil { 839 return nil, err 840 } 841 // If there is an URL account resolver, do basic test to see if anyone is home. 842 if ar := opts.AccountResolver; ar != nil { 843 if ur, ok := ar.(*URLAccResolver); ok { 844 if _, err := ur.Fetch(_EMPTY_); err != nil { 845 return nil, err 846 } 847 } 848 } 849 // For other resolver: 850 // In operator mode, when the account resolver depends on an external system and 851 // the system account can't be fetched, inject a temporary one. 852 if ar := s.accResolver; len(opts.TrustedOperators) == 1 && ar != nil && 853 opts.SystemAccount != _EMPTY_ && opts.SystemAccount != DEFAULT_SYSTEM_ACCOUNT { 854 if _, ok := ar.(*MemAccResolver); !ok { 855 s.mu.Unlock() 856 var a *Account 857 // perform direct lookup to avoid warning trace 858 if _, err := fetchAccount(ar, opts.SystemAccount); err == nil { 859 a, _ = s.lookupAccount(opts.SystemAccount) 860 } 861 s.mu.Lock() 862 if a == nil { 863 sac := NewAccount(opts.SystemAccount) 864 sac.Issuer = opts.TrustedOperators[0].Issuer 865 sac.signingKeys = map[string]jwt.Scope{} 866 sac.signingKeys[opts.SystemAccount] = nil 867 s.registerAccountNoLock(sac) 868 } 869 } 870 } 871 872 // For tracking accounts 873 if _, err := s.configureAccounts(false); err != nil { 874 return nil, err 875 } 876 877 // Used to setup Authorization. 878 s.configureAuthorization() 879 880 // Start signal handler 881 s.handleSignals() 882 883 return s, nil 884 } 885 886 // Initializes route structures based on pooling and/or per-account routes. 887 // 888 // Server lock is held on entry 889 func (s *Server) initRouteStructures(opts *Options) { 890 s.routes = make(map[string][]*client) 891 if ps := opts.Cluster.PoolSize; ps > 0 { 892 s.routesPoolSize = ps 893 } else { 894 s.routesPoolSize = 1 895 } 896 // If we have per-account routes, we create accRoutes and initialize it 897 // with nil values. The presence of an account as the key will allow us 898 // to know if a given account is supposed to have dedicated routes. 899 if l := len(opts.Cluster.PinnedAccounts); l > 0 { 900 s.accRoutes = make(map[string]map[string]*client, l) 901 for _, acc := range opts.Cluster.PinnedAccounts { 902 s.accRoutes[acc] = make(map[string]*client) 903 } 904 } 905 } 906 907 func (s *Server) logRejectedTLSConns() { 908 defer s.grWG.Done() 909 t := time.NewTicker(time.Second) 910 defer t.Stop() 911 for { 912 select { 913 case <-s.quitCh: 914 return 915 case <-t.C: 916 blocked := s.connRateCounter.countBlocked() 917 if blocked > 0 { 918 s.Warnf("Rejected %d connections due to TLS rate limiting", blocked) 919 } 920 } 921 } 922 } 923 924 // clusterName returns our cluster name which could be dynamic. 925 func (s *Server) ClusterName() string { 926 s.mu.RLock() 927 cn := s.info.Cluster 928 s.mu.RUnlock() 929 return cn 930 } 931 932 // Grabs cluster name with cluster name specific lock. 933 func (s *Server) cachedClusterName() string { 934 s.cnMu.RLock() 935 cn := s.cn 936 s.cnMu.RUnlock() 937 return cn 938 } 939 940 // setClusterName will update the cluster name for this server. 941 func (s *Server) setClusterName(name string) { 942 s.mu.Lock() 943 var resetCh chan struct{} 944 if s.sys != nil && s.info.Cluster != name { 945 // can't hold the lock as go routine reading it may be waiting for lock as well 946 resetCh = s.sys.resetCh 947 } 948 s.info.Cluster = name 949 s.routeInfo.Cluster = name 950 951 // Need to close solicited leaf nodes. The close has to be done outside of the server lock. 952 var leafs []*client 953 for _, c := range s.leafs { 954 c.mu.Lock() 955 if c.leaf != nil && c.leaf.remote != nil { 956 leafs = append(leafs, c) 957 } 958 c.mu.Unlock() 959 } 960 s.mu.Unlock() 961 962 // Also place into mapping cn with cnMu lock. 963 s.cnMu.Lock() 964 s.cn = name 965 s.cnMu.Unlock() 966 967 for _, l := range leafs { 968 l.closeConnection(ClusterNameConflict) 969 } 970 if resetCh != nil { 971 resetCh <- struct{}{} 972 } 973 s.Noticef("Cluster name updated to %s", name) 974 } 975 976 // Return whether the cluster name is dynamic. 977 func (s *Server) isClusterNameDynamic() bool { 978 return s.getOpts().Cluster.Name == _EMPTY_ 979 } 980 981 // Returns our configured serverName. 982 func (s *Server) serverName() string { 983 return s.getOpts().ServerName 984 } 985 986 // ClientURL returns the URL used to connect clients. Helpful in testing 987 // when we designate a random client port (-1). 988 func (s *Server) ClientURL() string { 989 // FIXME(dlc) - should we add in user and pass if defined single? 990 opts := s.getOpts() 991 scheme := "nats://" 992 if opts.TLSConfig != nil { 993 scheme = "tls://" 994 } 995 return fmt.Sprintf("%s%s:%d", scheme, opts.Host, opts.Port) 996 } 997 998 func validateCluster(o *Options) error { 999 if o.Cluster.Compression.Mode != _EMPTY_ { 1000 if err := validateAndNormalizeCompressionOption(&o.Cluster.Compression, CompressionS2Fast); err != nil { 1001 return err 1002 } 1003 } 1004 if err := validatePinnedCerts(o.Cluster.TLSPinnedCerts); err != nil { 1005 return fmt.Errorf("cluster: %v", err) 1006 } 1007 // Check that cluster name if defined matches any gateway name. 1008 if o.Gateway.Name != "" && o.Gateway.Name != o.Cluster.Name { 1009 if o.Cluster.Name != "" { 1010 return ErrClusterNameConfigConflict 1011 } 1012 // Set this here so we do not consider it dynamic. 1013 o.Cluster.Name = o.Gateway.Name 1014 } 1015 if l := len(o.Cluster.PinnedAccounts); l > 0 { 1016 if o.Cluster.PoolSize < 0 { 1017 return fmt.Errorf("pool_size cannot be negative if pinned accounts are specified") 1018 } 1019 m := make(map[string]struct{}, l) 1020 for _, a := range o.Cluster.PinnedAccounts { 1021 if _, exists := m[a]; exists { 1022 return fmt.Errorf("found duplicate account name %q in pinned accounts list %q", a, o.Cluster.PinnedAccounts) 1023 } 1024 m[a] = struct{}{} 1025 } 1026 } 1027 return nil 1028 } 1029 1030 func validatePinnedCerts(pinned PinnedCertSet) error { 1031 re := regexp.MustCompile("^[a-f0-9]{64}$") 1032 for certId := range pinned { 1033 entry := strings.ToLower(certId) 1034 if !re.MatchString(entry) { 1035 return fmt.Errorf("error parsing 'pinned_certs' key %s does not look like lower case hex-encoded sha256 of DER encoded SubjectPublicKeyInfo", entry) 1036 } 1037 } 1038 return nil 1039 } 1040 1041 func validateOptions(o *Options) error { 1042 if o.LameDuckDuration > 0 && o.LameDuckGracePeriod >= o.LameDuckDuration { 1043 return fmt.Errorf("lame duck grace period (%v) should be strictly lower than lame duck duration (%v)", 1044 o.LameDuckGracePeriod, o.LameDuckDuration) 1045 } 1046 if int64(o.MaxPayload) > o.MaxPending { 1047 return fmt.Errorf("max_payload (%v) cannot be higher than max_pending (%v)", 1048 o.MaxPayload, o.MaxPending) 1049 } 1050 // Check that the trust configuration is correct. 1051 if err := validateTrustedOperators(o); err != nil { 1052 return err 1053 } 1054 // Check on leaf nodes which will require a system 1055 // account when gateways are also configured. 1056 if err := validateLeafNode(o); err != nil { 1057 return err 1058 } 1059 // Check that authentication is properly configured. 1060 if err := validateAuth(o); err != nil { 1061 return err 1062 } 1063 // Check that gateway is properly configured. Returns no error 1064 // if there is no gateway defined. 1065 if err := validateGatewayOptions(o); err != nil { 1066 return err 1067 } 1068 // Check that cluster name if defined matches any gateway name. 1069 if err := validateCluster(o); err != nil { 1070 return err 1071 } 1072 if err := validateMQTTOptions(o); err != nil { 1073 return err 1074 } 1075 if err := validateJetStreamOptions(o); err != nil { 1076 return err 1077 } 1078 // Finally check websocket options. 1079 return validateWebsocketOptions(o) 1080 } 1081 1082 func (s *Server) getOpts() *Options { 1083 s.optsMu.RLock() 1084 opts := s.opts 1085 s.optsMu.RUnlock() 1086 return opts 1087 } 1088 1089 func (s *Server) setOpts(opts *Options) { 1090 s.optsMu.Lock() 1091 s.opts = opts 1092 s.optsMu.Unlock() 1093 } 1094 1095 func (s *Server) globalAccount() *Account { 1096 s.mu.RLock() 1097 gacc := s.gacc 1098 s.mu.RUnlock() 1099 return gacc 1100 } 1101 1102 // Used to setup or update Accounts. 1103 // Returns a map that indicates which accounts have had their stream imports 1104 // changed (in case of an update in configuration reload). 1105 // Lock is held upon entry, but will be released/reacquired in this function. 1106 func (s *Server) configureAccounts(reloading bool) (map[string]struct{}, error) { 1107 awcsti := make(map[string]struct{}) 1108 1109 // Create the global account. 1110 if s.gacc == nil { 1111 s.gacc = NewAccount(globalAccountName) 1112 s.registerAccountNoLock(s.gacc) 1113 } 1114 1115 opts := s.getOpts() 1116 1117 // We need to track service imports since we can not swap them out (unsub and re-sub) 1118 // until the proper server struct accounts have been swapped in properly. Doing it in 1119 // place could lead to data loss or server panic since account under new si has no real 1120 // account and hence no sublist, so will panic on inbound message. 1121 siMap := make(map[*Account][][]byte) 1122 1123 // Check opts and walk through them. We need to copy them here 1124 // so that we do not keep a real one sitting in the options. 1125 for _, acc := range opts.Accounts { 1126 var a *Account 1127 create := true 1128 // For the global account, we want to skip the reload process 1129 // and fall back into the "create" case which will in that 1130 // case really be just an update (shallowCopy will make sure 1131 // that mappings are copied over). 1132 if reloading && acc.Name != globalAccountName { 1133 if ai, ok := s.accounts.Load(acc.Name); ok { 1134 a = ai.(*Account) 1135 a.mu.Lock() 1136 // Before updating the account, check if stream imports have changed. 1137 if !a.checkStreamImportsEqual(acc) { 1138 awcsti[acc.Name] = struct{}{} 1139 } 1140 // Collect the sids for the service imports since we are going to 1141 // replace with new ones. 1142 var sids [][]byte 1143 for _, si := range a.imports.services { 1144 if si.sid != nil { 1145 sids = append(sids, si.sid) 1146 } 1147 } 1148 // Setup to process later if needed. 1149 if len(sids) > 0 || len(acc.imports.services) > 0 { 1150 siMap[a] = sids 1151 } 1152 1153 // Now reset all export/imports fields since they are going to be 1154 // filled in shallowCopy() 1155 a.imports.streams, a.imports.services = nil, nil 1156 a.exports.streams, a.exports.services = nil, nil 1157 // We call shallowCopy from the account `acc` (the one in Options) 1158 // and pass `a` (our existing account) to get it updated. 1159 acc.shallowCopy(a) 1160 a.mu.Unlock() 1161 create = false 1162 } 1163 } 1164 // Track old mappings if global account. 1165 var oldGMappings []*mapping 1166 if create { 1167 if acc.Name == globalAccountName { 1168 a = s.gacc 1169 a.mu.Lock() 1170 oldGMappings = append(oldGMappings, a.mappings...) 1171 a.mu.Unlock() 1172 } else { 1173 a = NewAccount(acc.Name) 1174 } 1175 // Locking matters in the case of an update of the global account 1176 a.mu.Lock() 1177 acc.shallowCopy(a) 1178 a.mu.Unlock() 1179 // Will be a no-op in case of the global account since it is already registered. 1180 s.registerAccountNoLock(a) 1181 } 1182 1183 // The `acc` account is stored in options, not in the server, and these can be cleared. 1184 acc.sl, acc.clients, acc.mappings = nil, nil, nil 1185 1186 // Check here if we have been reloaded and we have a global account with mappings that may have changed. 1187 // If we have leafnodes they need to be updated. 1188 if reloading && a == s.gacc { 1189 a.mu.Lock() 1190 mappings := make(map[string]*mapping) 1191 if len(a.mappings) > 0 && a.nleafs > 0 { 1192 for _, em := range a.mappings { 1193 mappings[em.src] = em 1194 } 1195 } 1196 a.mu.Unlock() 1197 if len(mappings) > 0 || len(oldGMappings) > 0 { 1198 a.lmu.RLock() 1199 for _, lc := range a.lleafs { 1200 for _, em := range mappings { 1201 lc.forceAddToSmap(em.src) 1202 } 1203 // Remove any old ones if needed. 1204 for _, em := range oldGMappings { 1205 // Only remove if not in the new ones. 1206 if _, ok := mappings[em.src]; !ok { 1207 lc.forceRemoveFromSmap(em.src) 1208 } 1209 } 1210 } 1211 a.lmu.RUnlock() 1212 } 1213 } 1214 1215 // If we see an account defined using $SYS we will make sure that is set as system account. 1216 if acc.Name == DEFAULT_SYSTEM_ACCOUNT && opts.SystemAccount == _EMPTY_ { 1217 opts.SystemAccount = DEFAULT_SYSTEM_ACCOUNT 1218 } 1219 } 1220 1221 // Now that we have this we need to remap any referenced accounts in 1222 // import or export maps to the new ones. 1223 swapApproved := func(ea *exportAuth) { 1224 for sub, a := range ea.approved { 1225 var acc *Account 1226 if v, ok := s.accounts.Load(a.Name); ok { 1227 acc = v.(*Account) 1228 } 1229 ea.approved[sub] = acc 1230 } 1231 } 1232 var numAccounts int 1233 s.accounts.Range(func(k, v any) bool { 1234 numAccounts++ 1235 acc := v.(*Account) 1236 acc.mu.Lock() 1237 // Exports 1238 for _, se := range acc.exports.streams { 1239 if se != nil { 1240 swapApproved(&se.exportAuth) 1241 } 1242 } 1243 for _, se := range acc.exports.services { 1244 if se != nil { 1245 // Swap over the bound account for service exports. 1246 if se.acc != nil { 1247 if v, ok := s.accounts.Load(se.acc.Name); ok { 1248 se.acc = v.(*Account) 1249 } 1250 } 1251 swapApproved(&se.exportAuth) 1252 } 1253 } 1254 // Imports 1255 for _, si := range acc.imports.streams { 1256 if v, ok := s.accounts.Load(si.acc.Name); ok { 1257 si.acc = v.(*Account) 1258 } 1259 } 1260 for _, si := range acc.imports.services { 1261 if v, ok := s.accounts.Load(si.acc.Name); ok { 1262 si.acc = v.(*Account) 1263 1264 // It is possible to allow for latency tracking inside your 1265 // own account, so lock only when not the same account. 1266 if si.acc == acc { 1267 si.se = si.acc.getServiceExport(si.to) 1268 continue 1269 } 1270 si.acc.mu.RLock() 1271 si.se = si.acc.getServiceExport(si.to) 1272 si.acc.mu.RUnlock() 1273 } 1274 } 1275 // Make sure the subs are running, but only if not reloading. 1276 if len(acc.imports.services) > 0 && acc.ic == nil && !reloading { 1277 acc.ic = s.createInternalAccountClient() 1278 acc.ic.acc = acc 1279 // Need to release locks to invoke this function. 1280 acc.mu.Unlock() 1281 s.mu.Unlock() 1282 acc.addAllServiceImportSubs() 1283 s.mu.Lock() 1284 acc.mu.Lock() 1285 } 1286 acc.updated = time.Now() 1287 acc.mu.Unlock() 1288 return true 1289 }) 1290 1291 // Check if we need to process service imports pending from above. 1292 // This processing needs to be after we swap in the real accounts above. 1293 for acc, sids := range siMap { 1294 c := acc.ic 1295 for _, sid := range sids { 1296 c.processUnsub(sid) 1297 } 1298 acc.addAllServiceImportSubs() 1299 s.mu.Unlock() 1300 s.registerSystemImports(acc) 1301 s.mu.Lock() 1302 } 1303 1304 // Set the system account if it was configured. 1305 // Otherwise create a default one. 1306 if opts.SystemAccount != _EMPTY_ { 1307 // Lock may be acquired in lookupAccount, so release to call lookupAccount. 1308 s.mu.Unlock() 1309 acc, err := s.lookupAccount(opts.SystemAccount) 1310 s.mu.Lock() 1311 if err == nil && s.sys != nil && acc != s.sys.account { 1312 // sys.account.clients (including internal client)/respmap/etc... are transferred separately 1313 s.sys.account = acc 1314 } 1315 if err != nil { 1316 return awcsti, fmt.Errorf("error resolving system account: %v", err) 1317 } 1318 1319 // If we have defined a system account here check to see if its just us and the $G account. 1320 // We would do this to add user/pass to the system account. If this is the case add in 1321 // no-auth-user for $G. 1322 // Only do this if non-operator mode and we did not have an authorization block defined. 1323 if len(opts.TrustedOperators) == 0 && numAccounts == 2 && opts.NoAuthUser == _EMPTY_ && !opts.authBlockDefined { 1324 // If we come here from config reload, let's not recreate the fake user name otherwise 1325 // it will cause currently clients to be disconnected. 1326 uname := s.sysAccOnlyNoAuthUser 1327 if uname == _EMPTY_ { 1328 // Create a unique name so we do not collide. 1329 var b [8]byte 1330 rn := rand.Int63() 1331 for i, l := 0, rn; i < len(b); i++ { 1332 b[i] = digits[l%base] 1333 l /= base 1334 } 1335 uname = fmt.Sprintf("nats-%s", b[:]) 1336 s.sysAccOnlyNoAuthUser = uname 1337 } 1338 opts.Users = append(opts.Users, &User{Username: uname, Password: uname[6:], Account: s.gacc}) 1339 opts.NoAuthUser = uname 1340 } 1341 } 1342 1343 // Add any required exports from system account. 1344 if s.sys != nil { 1345 s.mu.Unlock() 1346 s.addSystemAccountExports(s.sys.account) 1347 s.mu.Lock() 1348 } 1349 1350 return awcsti, nil 1351 } 1352 1353 // Setup the account resolver. For memory resolver, make sure the JWTs are 1354 // properly formed but do not enforce expiration etc. 1355 // Lock is held on entry, but may be released/reacquired during this call. 1356 func (s *Server) configureResolver() error { 1357 opts := s.getOpts() 1358 s.accResolver = opts.AccountResolver 1359 if opts.AccountResolver != nil { 1360 // For URL resolver, set the TLSConfig if specified. 1361 if opts.AccountResolverTLSConfig != nil { 1362 if ar, ok := opts.AccountResolver.(*URLAccResolver); ok { 1363 if t, ok := ar.c.Transport.(*http.Transport); ok { 1364 t.CloseIdleConnections() 1365 t.TLSClientConfig = opts.AccountResolverTLSConfig.Clone() 1366 } 1367 } 1368 } 1369 if len(opts.resolverPreloads) > 0 { 1370 // Lock ordering is account resolver -> server, so we need to release 1371 // the lock and reacquire it when done with account resolver's calls. 1372 ar := s.accResolver 1373 s.mu.Unlock() 1374 defer s.mu.Lock() 1375 if ar.IsReadOnly() { 1376 return fmt.Errorf("resolver preloads only available for writeable resolver types MEM/DIR/CACHE_DIR") 1377 } 1378 for k, v := range opts.resolverPreloads { 1379 _, err := jwt.DecodeAccountClaims(v) 1380 if err != nil { 1381 return fmt.Errorf("preload account error for %q: %v", k, err) 1382 } 1383 ar.Store(k, v) 1384 } 1385 } 1386 } 1387 return nil 1388 } 1389 1390 // This will check preloads for validation issues. 1391 func (s *Server) checkResolvePreloads() { 1392 opts := s.getOpts() 1393 // We can just check the read-only opts versions here, that way we do not need 1394 // to grab server lock or access s.accResolver. 1395 for k, v := range opts.resolverPreloads { 1396 claims, err := jwt.DecodeAccountClaims(v) 1397 if err != nil { 1398 s.Errorf("Preloaded account [%s] not valid", k) 1399 continue 1400 } 1401 // Check if it is expired. 1402 vr := jwt.CreateValidationResults() 1403 claims.Validate(vr) 1404 if vr.IsBlocking(true) { 1405 s.Warnf("Account [%s] has validation issues:", k) 1406 for _, v := range vr.Issues { 1407 s.Warnf(" - %s", v.Description) 1408 } 1409 } 1410 } 1411 } 1412 1413 // Determines if we are in pre NATS 2.0 setup with no accounts. 1414 func (s *Server) globalAccountOnly() bool { 1415 var hasOthers bool 1416 1417 if s.trustedKeys != nil { 1418 return false 1419 } 1420 1421 s.mu.RLock() 1422 s.accounts.Range(func(k, v any) bool { 1423 acc := v.(*Account) 1424 // Ignore global and system 1425 if acc == s.gacc || (s.sys != nil && acc == s.sys.account) { 1426 return true 1427 } 1428 hasOthers = true 1429 return false 1430 }) 1431 s.mu.RUnlock() 1432 1433 return !hasOthers 1434 } 1435 1436 // Determines if this server is in standalone mode, meaning no routes or gateways. 1437 func (s *Server) standAloneMode() bool { 1438 opts := s.getOpts() 1439 return opts.Cluster.Port == 0 && opts.Gateway.Port == 0 1440 } 1441 1442 func (s *Server) configuredRoutes() int { 1443 return len(s.getOpts().Routes) 1444 } 1445 1446 // activePeers is used in bootstrapping raft groups like the JetStream meta controller. 1447 func (s *Server) ActivePeers() (peers []string) { 1448 s.nodeToInfo.Range(func(k, v any) bool { 1449 si := v.(nodeInfo) 1450 if !si.offline { 1451 peers = append(peers, k.(string)) 1452 } 1453 return true 1454 }) 1455 return peers 1456 } 1457 1458 // isTrustedIssuer will check that the issuer is a trusted public key. 1459 // This is used to make sure an account was signed by a trusted operator. 1460 func (s *Server) isTrustedIssuer(issuer string) bool { 1461 s.mu.RLock() 1462 defer s.mu.RUnlock() 1463 // If we are not running in trusted mode and there is no issuer, that is ok. 1464 if s.trustedKeys == nil && issuer == _EMPTY_ { 1465 return true 1466 } 1467 for _, tk := range s.trustedKeys { 1468 if tk == issuer { 1469 return true 1470 } 1471 } 1472 return false 1473 } 1474 1475 // processTrustedKeys will process binary stamped and 1476 // options-based trusted nkeys. Returns success. 1477 func (s *Server) processTrustedKeys() bool { 1478 s.strictSigningKeyUsage = map[string]struct{}{} 1479 opts := s.getOpts() 1480 if trustedKeys != _EMPTY_ && !s.initStampedTrustedKeys() { 1481 return false 1482 } else if opts.TrustedKeys != nil { 1483 for _, key := range opts.TrustedKeys { 1484 if !nkeys.IsValidPublicOperatorKey(key) { 1485 return false 1486 } 1487 } 1488 s.trustedKeys = append([]string(nil), opts.TrustedKeys...) 1489 for _, claim := range opts.TrustedOperators { 1490 if !claim.StrictSigningKeyUsage { 1491 continue 1492 } 1493 for _, key := range claim.SigningKeys { 1494 s.strictSigningKeyUsage[key] = struct{}{} 1495 } 1496 } 1497 } 1498 return true 1499 } 1500 1501 // checkTrustedKeyString will check that the string is a valid array 1502 // of public operator nkeys. 1503 func checkTrustedKeyString(keys string) []string { 1504 tks := strings.Fields(keys) 1505 if len(tks) == 0 { 1506 return nil 1507 } 1508 // Walk all the keys and make sure they are valid. 1509 for _, key := range tks { 1510 if !nkeys.IsValidPublicOperatorKey(key) { 1511 return nil 1512 } 1513 } 1514 return tks 1515 } 1516 1517 // initStampedTrustedKeys will check the stamped trusted keys 1518 // and will set the server field 'trustedKeys'. Returns whether 1519 // it succeeded or not. 1520 func (s *Server) initStampedTrustedKeys() bool { 1521 // Check to see if we have an override in options, which will cause us to fail. 1522 if len(s.getOpts().TrustedKeys) > 0 { 1523 return false 1524 } 1525 tks := checkTrustedKeyString(trustedKeys) 1526 if len(tks) == 0 { 1527 return false 1528 } 1529 s.trustedKeys = tks 1530 return true 1531 } 1532 1533 // PrintAndDie is exported for access in other packages. 1534 func PrintAndDie(msg string) { 1535 fmt.Fprintln(os.Stderr, msg) 1536 os.Exit(1) 1537 } 1538 1539 // PrintServerAndExit will print our version and exit. 1540 func PrintServerAndExit() { 1541 fmt.Printf("nats-server: v%s\n", VERSION) 1542 os.Exit(0) 1543 } 1544 1545 // ProcessCommandLineArgs takes the command line arguments 1546 // validating and setting flags for handling in case any 1547 // sub command was present. 1548 func ProcessCommandLineArgs(cmd *flag.FlagSet) (showVersion bool, showHelp bool, err error) { 1549 if len(cmd.Args()) > 0 { 1550 arg := cmd.Args()[0] 1551 switch strings.ToLower(arg) { 1552 case "version": 1553 return true, false, nil 1554 case "help": 1555 return false, true, nil 1556 default: 1557 return false, false, fmt.Errorf("unrecognized command: %q", arg) 1558 } 1559 } 1560 1561 return false, false, nil 1562 } 1563 1564 // Public version. 1565 func (s *Server) Running() bool { 1566 return s.isRunning() 1567 } 1568 1569 // Protected check on running state 1570 func (s *Server) isRunning() bool { 1571 return s.running.Load() 1572 } 1573 1574 func (s *Server) logPid() error { 1575 pidStr := strconv.Itoa(os.Getpid()) 1576 return os.WriteFile(s.getOpts().PidFile, []byte(pidStr), 0660) 1577 } 1578 1579 // numReservedAccounts will return the number of reserved accounts configured in the server. 1580 // Currently this is 1, one for the global default account. 1581 func (s *Server) numReservedAccounts() int { 1582 return 1 1583 } 1584 1585 // NumActiveAccounts reports number of active accounts on this server. 1586 func (s *Server) NumActiveAccounts() int32 { 1587 return atomic.LoadInt32(&s.activeAccounts) 1588 } 1589 1590 // incActiveAccounts() just adds one under lock. 1591 func (s *Server) incActiveAccounts() { 1592 atomic.AddInt32(&s.activeAccounts, 1) 1593 } 1594 1595 // decActiveAccounts() just subtracts one under lock. 1596 func (s *Server) decActiveAccounts() { 1597 atomic.AddInt32(&s.activeAccounts, -1) 1598 } 1599 1600 // This should be used for testing only. Will be slow since we have to 1601 // range over all accounts in the sync.Map to count. 1602 func (s *Server) numAccounts() int { 1603 count := 0 1604 s.mu.RLock() 1605 s.accounts.Range(func(k, v any) bool { 1606 count++ 1607 return true 1608 }) 1609 s.mu.RUnlock() 1610 return count 1611 } 1612 1613 // NumLoadedAccounts returns the number of loaded accounts. 1614 func (s *Server) NumLoadedAccounts() int { 1615 return s.numAccounts() 1616 } 1617 1618 // LookupOrRegisterAccount will return the given account if known or create a new entry. 1619 func (s *Server) LookupOrRegisterAccount(name string) (account *Account, isNew bool) { 1620 s.mu.Lock() 1621 defer s.mu.Unlock() 1622 if v, ok := s.accounts.Load(name); ok { 1623 return v.(*Account), false 1624 } 1625 acc := NewAccount(name) 1626 s.registerAccountNoLock(acc) 1627 return acc, true 1628 } 1629 1630 // RegisterAccount will register an account. The account must be new 1631 // or this call will fail. 1632 func (s *Server) RegisterAccount(name string) (*Account, error) { 1633 s.mu.Lock() 1634 defer s.mu.Unlock() 1635 if _, ok := s.accounts.Load(name); ok { 1636 return nil, ErrAccountExists 1637 } 1638 acc := NewAccount(name) 1639 s.registerAccountNoLock(acc) 1640 return acc, nil 1641 } 1642 1643 // SetSystemAccount will set the internal system account. 1644 // If root operators are present it will also check validity. 1645 func (s *Server) SetSystemAccount(accName string) error { 1646 // Lookup from sync.Map first. 1647 if v, ok := s.accounts.Load(accName); ok { 1648 return s.setSystemAccount(v.(*Account)) 1649 } 1650 1651 // If we are here we do not have local knowledge of this account. 1652 // Do this one by hand to return more useful error. 1653 ac, jwt, err := s.fetchAccountClaims(accName) 1654 if err != nil { 1655 return err 1656 } 1657 acc := s.buildInternalAccount(ac) 1658 acc.claimJWT = jwt 1659 // Due to race, we need to make sure that we are not 1660 // registering twice. 1661 if racc := s.registerAccount(acc); racc != nil { 1662 return nil 1663 } 1664 return s.setSystemAccount(acc) 1665 } 1666 1667 // SystemAccount returns the system account if set. 1668 func (s *Server) SystemAccount() *Account { 1669 var sacc *Account 1670 s.mu.RLock() 1671 if s.sys != nil { 1672 sacc = s.sys.account 1673 } 1674 s.mu.RUnlock() 1675 return sacc 1676 } 1677 1678 // GlobalAccount returns the global account. 1679 // Default clients will use the global account. 1680 func (s *Server) GlobalAccount() *Account { 1681 s.mu.RLock() 1682 defer s.mu.RUnlock() 1683 return s.gacc 1684 } 1685 1686 // SetDefaultSystemAccount will create a default system account if one is not present. 1687 func (s *Server) SetDefaultSystemAccount() error { 1688 if _, isNew := s.LookupOrRegisterAccount(DEFAULT_SYSTEM_ACCOUNT); !isNew { 1689 return nil 1690 } 1691 s.Debugf("Created system account: %q", DEFAULT_SYSTEM_ACCOUNT) 1692 return s.SetSystemAccount(DEFAULT_SYSTEM_ACCOUNT) 1693 } 1694 1695 // Assign a system account. Should only be called once. 1696 // This sets up a server to send and receive messages from 1697 // inside the server itself. 1698 func (s *Server) setSystemAccount(acc *Account) error { 1699 if acc == nil { 1700 return ErrMissingAccount 1701 } 1702 // Don't try to fix this here. 1703 if acc.IsExpired() { 1704 return ErrAccountExpired 1705 } 1706 // If we are running with trusted keys for an operator 1707 // make sure we check the account is legit. 1708 if !s.isTrustedIssuer(acc.Issuer) { 1709 return ErrAccountValidation 1710 } 1711 1712 s.mu.Lock() 1713 1714 if s.sys != nil { 1715 s.mu.Unlock() 1716 return ErrAccountExists 1717 } 1718 1719 // This is here in an attempt to quiet the race detector and not have to place 1720 // locks on fast path for inbound messages and checking service imports. 1721 acc.mu.Lock() 1722 if acc.imports.services == nil { 1723 acc.imports.services = make(map[string]*serviceImport) 1724 } 1725 acc.mu.Unlock() 1726 1727 s.sys = &internal{ 1728 account: acc, 1729 client: s.createInternalSystemClient(), 1730 seq: 1, 1731 sid: 1, 1732 servers: make(map[string]*serverUpdate), 1733 replies: make(map[string]msgHandler), 1734 sendq: newIPQueue[*pubMsg](s, "System sendQ"), 1735 recvq: newIPQueue[*inSysMsg](s, "System recvQ"), 1736 resetCh: make(chan struct{}), 1737 sq: s.newSendQ(), 1738 statsz: eventsHBInterval, 1739 orphMax: 5 * eventsHBInterval, 1740 chkOrph: 3 * eventsHBInterval, 1741 } 1742 s.sys.wg.Add(1) 1743 s.mu.Unlock() 1744 1745 // Register with the account. 1746 s.sys.client.registerWithAccount(acc) 1747 1748 s.addSystemAccountExports(acc) 1749 1750 // Start our internal loop to serialize outbound messages. 1751 // We do our own wg here since we will stop first during shutdown. 1752 go s.internalSendLoop(&s.sys.wg) 1753 1754 // Start the internal loop for inbound messages. 1755 go s.internalReceiveLoop() 1756 1757 // Start up our general subscriptions 1758 s.initEventTracking() 1759 1760 // Track for dead remote servers. 1761 s.wrapChk(s.startRemoteServerSweepTimer)() 1762 1763 // Send out statsz updates periodically. 1764 s.wrapChk(s.startStatszTimer)() 1765 1766 // If we have existing accounts make sure we enable account tracking. 1767 s.mu.Lock() 1768 s.accounts.Range(func(k, v any) bool { 1769 acc := v.(*Account) 1770 s.enableAccountTracking(acc) 1771 return true 1772 }) 1773 s.mu.Unlock() 1774 1775 return nil 1776 } 1777 1778 // Creates an internal system client. 1779 func (s *Server) createInternalSystemClient() *client { 1780 return s.createInternalClient(SYSTEM) 1781 } 1782 1783 // Creates an internal jetstream client. 1784 func (s *Server) createInternalJetStreamClient() *client { 1785 return s.createInternalClient(JETSTREAM) 1786 } 1787 1788 // Creates an internal client for Account. 1789 func (s *Server) createInternalAccountClient() *client { 1790 return s.createInternalClient(ACCOUNT) 1791 } 1792 1793 // Internal clients. kind should be SYSTEM or JETSTREAM 1794 func (s *Server) createInternalClient(kind int) *client { 1795 if kind != SYSTEM && kind != JETSTREAM && kind != ACCOUNT { 1796 return nil 1797 } 1798 now := time.Now() 1799 c := &client{srv: s, kind: kind, opts: internalOpts, msubs: -1, mpay: -1, start: now, last: now} 1800 c.initClient() 1801 c.echo = false 1802 c.headers = true 1803 c.flags.set(noReconnect) 1804 return c 1805 } 1806 1807 // Determine if accounts should track subscriptions for 1808 // efficient propagation. 1809 // Lock should be held on entry. 1810 func (s *Server) shouldTrackSubscriptions() bool { 1811 opts := s.getOpts() 1812 return (opts.Cluster.Port != 0 || opts.Gateway.Port != 0) 1813 } 1814 1815 // Invokes registerAccountNoLock under the protection of the server lock. 1816 // That is, server lock is acquired/released in this function. 1817 // See registerAccountNoLock for comment on returned value. 1818 func (s *Server) registerAccount(acc *Account) *Account { 1819 s.mu.Lock() 1820 racc := s.registerAccountNoLock(acc) 1821 s.mu.Unlock() 1822 return racc 1823 } 1824 1825 // Helper to set the sublist based on preferences. 1826 func (s *Server) setAccountSublist(acc *Account) { 1827 if acc != nil && acc.sl == nil { 1828 opts := s.getOpts() 1829 if opts != nil && opts.NoSublistCache { 1830 acc.sl = NewSublistNoCache() 1831 } else { 1832 acc.sl = NewSublistWithCache() 1833 } 1834 } 1835 } 1836 1837 // Registers an account in the server. 1838 // Due to some locking considerations, we may end-up trying 1839 // to register the same account twice. This function will 1840 // then return the already registered account. 1841 // Lock should be held on entry. 1842 func (s *Server) registerAccountNoLock(acc *Account) *Account { 1843 // We are under the server lock. Lookup from map, if present 1844 // return existing account. 1845 if a, _ := s.accounts.Load(acc.Name); a != nil { 1846 s.tmpAccounts.Delete(acc.Name) 1847 return a.(*Account) 1848 } 1849 // Finish account setup and store. 1850 s.setAccountSublist(acc) 1851 1852 acc.mu.Lock() 1853 s.setRouteInfo(acc) 1854 if acc.clients == nil { 1855 acc.clients = make(map[*client]struct{}) 1856 } 1857 1858 // If we are capable of routing we will track subscription 1859 // information for efficient interest propagation. 1860 // During config reload, it is possible that account was 1861 // already created (global account), so use locking and 1862 // make sure we create only if needed. 1863 // TODO(dlc)- Double check that we need this for GWs. 1864 if acc.rm == nil && s.opts != nil && s.shouldTrackSubscriptions() { 1865 acc.rm = make(map[string]int32) 1866 acc.lqws = make(map[string]int32) 1867 } 1868 acc.srv = s 1869 acc.updated = time.Now() 1870 accName := acc.Name 1871 jsEnabled := len(acc.jsLimits) > 0 1872 acc.mu.Unlock() 1873 1874 if opts := s.getOpts(); opts != nil && len(opts.JsAccDefaultDomain) > 0 { 1875 if defDomain, ok := opts.JsAccDefaultDomain[accName]; ok { 1876 if jsEnabled { 1877 s.Warnf("Skipping Default Domain %q, set for JetStream enabled account %q", defDomain, accName) 1878 } else if defDomain != _EMPTY_ { 1879 for src, dest := range generateJSMappingTable(defDomain) { 1880 // flip src and dest around so the domain is inserted 1881 s.Noticef("Adding default domain mapping %q -> %q to account %q %p", dest, src, accName, acc) 1882 if err := acc.AddMapping(dest, src); err != nil { 1883 s.Errorf("Error adding JetStream default domain mapping: %v", err) 1884 } 1885 } 1886 } 1887 } 1888 } 1889 1890 s.accounts.Store(acc.Name, acc) 1891 s.tmpAccounts.Delete(acc.Name) 1892 s.enableAccountTracking(acc) 1893 1894 // Can not have server lock here. 1895 s.mu.Unlock() 1896 s.registerSystemImports(acc) 1897 // Starting 2.9.0, we are phasing out the optimistic mode, so change 1898 // the account to interest-only mode (except if instructed not to do 1899 // it in some tests). 1900 if s.gateway.enabled && !gwDoNotForceInterestOnlyMode { 1901 s.switchAccountToInterestMode(acc.GetName()) 1902 } 1903 s.mu.Lock() 1904 1905 return nil 1906 } 1907 1908 // Sets the account's routePoolIdx depending on presence or not of 1909 // pooling or per-account routes. Also updates a map used by 1910 // gateway code to retrieve a route based on some route hash. 1911 // 1912 // Both Server and Account lock held on entry. 1913 func (s *Server) setRouteInfo(acc *Account) { 1914 // If there is a dedicated route configured for this account 1915 if _, ok := s.accRoutes[acc.Name]; ok { 1916 // We want the account name to be in the map, but we don't 1917 // need a value (we could store empty string) 1918 s.accRouteByHash.Store(acc.Name, nil) 1919 // Set the route pool index to -1 so that it is easy when 1920 // ranging over accounts to exclude those accounts when 1921 // trying to get accounts for a given pool index. 1922 acc.routePoolIdx = accDedicatedRoute 1923 } else { 1924 // If pool size more than 1, we will compute a hash code and 1925 // use modulo to assign to an index of the pool slice. For 1 1926 // and below, all accounts will be bound to the single connection 1927 // at index 0. 1928 acc.routePoolIdx = s.computeRoutePoolIdx(acc) 1929 if s.routesPoolSize > 1 { 1930 s.accRouteByHash.Store(acc.Name, acc.routePoolIdx) 1931 } 1932 } 1933 } 1934 1935 // Returns a route pool index for this account based on the given pool size. 1936 // Account lock is held on entry (account's name is accessed but immutable 1937 // so could be called without account's lock). 1938 // Server lock held on entry. 1939 func (s *Server) computeRoutePoolIdx(acc *Account) int { 1940 if s.routesPoolSize <= 1 { 1941 return 0 1942 } 1943 h := fnv.New32a() 1944 h.Write([]byte(acc.Name)) 1945 sum32 := h.Sum32() 1946 return int((sum32 % uint32(s.routesPoolSize))) 1947 } 1948 1949 // lookupAccount is a function to return the account structure 1950 // associated with an account name. 1951 // Lock MUST NOT be held upon entry. 1952 func (s *Server) lookupAccount(name string) (*Account, error) { 1953 var acc *Account 1954 if v, ok := s.accounts.Load(name); ok { 1955 acc = v.(*Account) 1956 } 1957 if acc != nil { 1958 // If we are expired and we have a resolver, then 1959 // return the latest information from the resolver. 1960 if acc.IsExpired() { 1961 s.Debugf("Requested account [%s] has expired", name) 1962 if s.AccountResolver() != nil { 1963 if err := s.updateAccount(acc); err != nil { 1964 // This error could mask expired, so just return expired here. 1965 return nil, ErrAccountExpired 1966 } 1967 } else { 1968 return nil, ErrAccountExpired 1969 } 1970 } 1971 return acc, nil 1972 } 1973 // If we have a resolver see if it can fetch the account. 1974 if s.AccountResolver() == nil { 1975 return nil, ErrMissingAccount 1976 } 1977 return s.fetchAccount(name) 1978 } 1979 1980 // LookupAccount is a public function to return the account structure 1981 // associated with name. 1982 func (s *Server) LookupAccount(name string) (*Account, error) { 1983 return s.lookupAccount(name) 1984 } 1985 1986 // This will fetch new claims and if found update the account with new claims. 1987 // Lock MUST NOT be held upon entry. 1988 func (s *Server) updateAccount(acc *Account) error { 1989 acc.mu.RLock() 1990 // TODO(dlc) - Make configurable 1991 if !acc.incomplete && time.Since(acc.updated) < time.Second { 1992 acc.mu.RUnlock() 1993 s.Debugf("Requested account update for [%s] ignored, too soon", acc.Name) 1994 return ErrAccountResolverUpdateTooSoon 1995 } 1996 acc.mu.RUnlock() 1997 claimJWT, err := s.fetchRawAccountClaims(acc.Name) 1998 if err != nil { 1999 return err 2000 } 2001 return s.updateAccountWithClaimJWT(acc, claimJWT) 2002 } 2003 2004 // updateAccountWithClaimJWT will check and apply the claim update. 2005 // Lock MUST NOT be held upon entry. 2006 func (s *Server) updateAccountWithClaimJWT(acc *Account, claimJWT string) error { 2007 if acc == nil { 2008 return ErrMissingAccount 2009 } 2010 acc.mu.RLock() 2011 sameClaim := acc.claimJWT != _EMPTY_ && acc.claimJWT == claimJWT && !acc.incomplete 2012 acc.mu.RUnlock() 2013 if sameClaim { 2014 s.Debugf("Requested account update for [%s], same claims detected", acc.Name) 2015 return nil 2016 } 2017 accClaims, _, err := s.verifyAccountClaims(claimJWT) 2018 if err == nil && accClaims != nil { 2019 acc.mu.Lock() 2020 if acc.Issuer == _EMPTY_ { 2021 acc.Issuer = accClaims.Issuer 2022 } 2023 if acc.Name != accClaims.Subject { 2024 acc.mu.Unlock() 2025 return ErrAccountValidation 2026 } 2027 acc.mu.Unlock() 2028 s.UpdateAccountClaims(acc, accClaims) 2029 acc.mu.Lock() 2030 // needs to be set after update completed. 2031 // This causes concurrent calls to return with sameClaim=true if the change is effective. 2032 acc.claimJWT = claimJWT 2033 acc.mu.Unlock() 2034 return nil 2035 } 2036 return err 2037 } 2038 2039 // fetchRawAccountClaims will grab raw account claims iff we have a resolver. 2040 // Lock is NOT held upon entry. 2041 func (s *Server) fetchRawAccountClaims(name string) (string, error) { 2042 accResolver := s.AccountResolver() 2043 if accResolver == nil { 2044 return _EMPTY_, ErrNoAccountResolver 2045 } 2046 // Need to do actual Fetch 2047 start := time.Now() 2048 claimJWT, err := fetchAccount(accResolver, name) 2049 fetchTime := time.Since(start) 2050 if fetchTime > time.Second { 2051 s.Warnf("Account [%s] fetch took %v", name, fetchTime) 2052 } else { 2053 s.Debugf("Account [%s] fetch took %v", name, fetchTime) 2054 } 2055 if err != nil { 2056 s.Warnf("Account fetch failed: %v", err) 2057 return "", err 2058 } 2059 return claimJWT, nil 2060 } 2061 2062 // fetchAccountClaims will attempt to fetch new claims if a resolver is present. 2063 // Lock is NOT held upon entry. 2064 func (s *Server) fetchAccountClaims(name string) (*jwt.AccountClaims, string, error) { 2065 claimJWT, err := s.fetchRawAccountClaims(name) 2066 if err != nil { 2067 return nil, _EMPTY_, err 2068 } 2069 var claim *jwt.AccountClaims 2070 claim, claimJWT, err = s.verifyAccountClaims(claimJWT) 2071 if claim != nil && claim.Subject != name { 2072 return nil, _EMPTY_, ErrAccountValidation 2073 } 2074 return claim, claimJWT, err 2075 } 2076 2077 // verifyAccountClaims will decode and validate any account claims. 2078 func (s *Server) verifyAccountClaims(claimJWT string) (*jwt.AccountClaims, string, error) { 2079 accClaims, err := jwt.DecodeAccountClaims(claimJWT) 2080 if err != nil { 2081 return nil, _EMPTY_, err 2082 } 2083 if !s.isTrustedIssuer(accClaims.Issuer) { 2084 return nil, _EMPTY_, ErrAccountValidation 2085 } 2086 vr := jwt.CreateValidationResults() 2087 accClaims.Validate(vr) 2088 if vr.IsBlocking(true) { 2089 return nil, _EMPTY_, ErrAccountValidation 2090 } 2091 return accClaims, claimJWT, nil 2092 } 2093 2094 // This will fetch an account from a resolver if defined. 2095 // Lock is NOT held upon entry. 2096 func (s *Server) fetchAccount(name string) (*Account, error) { 2097 accClaims, claimJWT, err := s.fetchAccountClaims(name) 2098 if accClaims == nil { 2099 return nil, err 2100 } 2101 acc := s.buildInternalAccount(accClaims) 2102 acc.claimJWT = claimJWT 2103 // Due to possible race, if registerAccount() returns a non 2104 // nil account, it means the same account was already 2105 // registered and we should use this one. 2106 if racc := s.registerAccount(acc); racc != nil { 2107 // Update with the new claims in case they are new. 2108 if err = s.updateAccountWithClaimJWT(racc, claimJWT); err != nil { 2109 return nil, err 2110 } 2111 return racc, nil 2112 } 2113 // The sub imports may have been setup but will not have had their 2114 // subscriptions properly setup. Do that here. 2115 var needImportSubs bool 2116 2117 acc.mu.Lock() 2118 if len(acc.imports.services) > 0 { 2119 if acc.ic == nil { 2120 acc.ic = s.createInternalAccountClient() 2121 acc.ic.acc = acc 2122 } 2123 needImportSubs = true 2124 } 2125 acc.mu.Unlock() 2126 2127 // Do these outside the lock. 2128 if needImportSubs { 2129 acc.addAllServiceImportSubs() 2130 } 2131 2132 return acc, nil 2133 } 2134 2135 // Start up the server, this will not block. 2136 // 2137 // WaitForShutdown can be used to block and wait for the server to shutdown properly if needed 2138 // after calling s.Shutdown() 2139 func (s *Server) Start() { 2140 s.Noticef("Starting nats-server") 2141 2142 gc := gitCommit 2143 if gc == _EMPTY_ { 2144 gc = "not set" 2145 } 2146 2147 // Snapshot server options. 2148 opts := s.getOpts() 2149 clusterName := s.ClusterName() 2150 2151 s.Noticef(" Version: %s", VERSION) 2152 s.Noticef(" Git: [%s]", gc) 2153 s.Debugf(" Go build: %s", s.info.GoVersion) 2154 if clusterName != _EMPTY_ { 2155 s.Noticef(" Cluster: %s", clusterName) 2156 } 2157 s.Noticef(" Name: %s", s.info.Name) 2158 if opts.JetStream { 2159 s.Noticef(" Node: %s", getHash(s.info.Name)) 2160 } 2161 s.Noticef(" ID: %s", s.info.ID) 2162 2163 defer s.Noticef("Server is ready") 2164 2165 // Check for insecure configurations. 2166 s.checkAuthforWarnings() 2167 2168 // Avoid RACE between Start() and Shutdown() 2169 s.running.Store(true) 2170 s.mu.Lock() 2171 // Update leafNodeEnabled in case options have changed post NewServer() 2172 // and before Start() (we should not be able to allow that, but server has 2173 // direct reference to user-provided options - at least before a Reload() is 2174 // performed. 2175 s.leafNodeEnabled = opts.LeafNode.Port != 0 || len(opts.LeafNode.Remotes) > 0 2176 s.mu.Unlock() 2177 2178 s.grMu.Lock() 2179 s.grRunning = true 2180 s.grMu.Unlock() 2181 2182 s.startRateLimitLogExpiration() 2183 2184 // Pprof http endpoint for the profiler. 2185 if opts.ProfPort != 0 { 2186 s.StartProfiler() 2187 } else { 2188 // It's still possible to access this profile via a SYS endpoint, so set 2189 // this anyway. (Otherwise StartProfiler would have called it.) 2190 s.setBlockProfileRate(opts.ProfBlockRate) 2191 } 2192 2193 if opts.ConfigFile != _EMPTY_ { 2194 s.Noticef("Using configuration file: %s", opts.ConfigFile) 2195 } 2196 2197 hasOperators := len(opts.TrustedOperators) > 0 2198 if hasOperators { 2199 s.Noticef("Trusted Operators") 2200 } 2201 for _, opc := range opts.TrustedOperators { 2202 s.Noticef(" System : %q", opc.Audience) 2203 s.Noticef(" Operator: %q", opc.Name) 2204 s.Noticef(" Issued : %v", time.Unix(opc.IssuedAt, 0)) 2205 switch opc.Expires { 2206 case 0: 2207 s.Noticef(" Expires : Never") 2208 default: 2209 s.Noticef(" Expires : %v", time.Unix(opc.Expires, 0)) 2210 } 2211 } 2212 if hasOperators && opts.SystemAccount == _EMPTY_ { 2213 s.Warnf("Trusted Operators should utilize a System Account") 2214 } 2215 if opts.MaxPayload > MAX_PAYLOAD_MAX_SIZE { 2216 s.Warnf("Maximum payloads over %v are generally discouraged and could lead to poor performance", 2217 friendlyBytes(int64(MAX_PAYLOAD_MAX_SIZE))) 2218 } 2219 2220 if len(opts.JsAccDefaultDomain) > 0 { 2221 s.Warnf("The option `default_js_domain` is a temporary backwards compatibility measure and will be removed") 2222 } 2223 2224 // If we have a memory resolver, check the accounts here for validation exceptions. 2225 // This allows them to be logged right away vs when they are accessed via a client. 2226 if hasOperators && len(opts.resolverPreloads) > 0 { 2227 s.checkResolvePreloads() 2228 } 2229 2230 // Log the pid to a file. 2231 if opts.PidFile != _EMPTY_ { 2232 if err := s.logPid(); err != nil { 2233 s.Fatalf("Could not write pidfile: %v", err) 2234 return 2235 } 2236 } 2237 2238 // Setup system account which will start the eventing stack. 2239 if sa := opts.SystemAccount; sa != _EMPTY_ { 2240 if err := s.SetSystemAccount(sa); err != nil { 2241 s.Fatalf("Can't set system account: %v", err) 2242 return 2243 } 2244 } else if !opts.NoSystemAccount { 2245 // We will create a default system account here. 2246 s.SetDefaultSystemAccount() 2247 } 2248 2249 // Start monitoring before enabling other subsystems of the 2250 // server to be able to monitor during startup. 2251 if err := s.StartMonitoring(); err != nil { 2252 s.Fatalf("Can't start monitoring: %v", err) 2253 return 2254 } 2255 2256 // Start up resolver machinery. 2257 if ar := s.AccountResolver(); ar != nil { 2258 if err := ar.Start(s); err != nil { 2259 s.Fatalf("Could not start resolver: %v", err) 2260 return 2261 } 2262 // In operator mode, when the account resolver depends on an external system and 2263 // the system account is the bootstrapping account, start fetching it. 2264 if len(opts.TrustedOperators) == 1 && opts.SystemAccount != _EMPTY_ && opts.SystemAccount != DEFAULT_SYSTEM_ACCOUNT { 2265 opts := s.getOpts() 2266 _, isMemResolver := ar.(*MemAccResolver) 2267 if v, ok := s.accounts.Load(opts.SystemAccount); !isMemResolver && ok && v.(*Account).claimJWT == _EMPTY_ { 2268 s.Noticef("Using bootstrapping system account") 2269 s.startGoRoutine(func() { 2270 defer s.grWG.Done() 2271 t := time.NewTicker(time.Second) 2272 defer t.Stop() 2273 for { 2274 select { 2275 case <-s.quitCh: 2276 return 2277 case <-t.C: 2278 sacc := s.SystemAccount() 2279 if claimJWT, err := fetchAccount(ar, opts.SystemAccount); err != nil { 2280 continue 2281 } else if err = s.updateAccountWithClaimJWT(sacc, claimJWT); err != nil { 2282 continue 2283 } 2284 s.Noticef("System account fetched and updated") 2285 return 2286 } 2287 } 2288 }) 2289 } 2290 } 2291 } 2292 2293 // Start expiration of mapped GW replies, regardless if 2294 // this server is configured with gateway or not. 2295 s.startGWReplyMapExpiration() 2296 2297 // Check if JetStream has been enabled. This needs to be after 2298 // the system account setup above. JetStream will create its 2299 // own system account if one is not present. 2300 if opts.JetStream { 2301 // Make sure someone is not trying to enable on the system account. 2302 if sa := s.SystemAccount(); sa != nil && len(sa.jsLimits) > 0 { 2303 s.Fatalf("Not allowed to enable JetStream on the system account") 2304 } 2305 cfg := &JetStreamConfig{ 2306 StoreDir: opts.StoreDir, 2307 SyncInterval: opts.SyncInterval, 2308 SyncAlways: opts.SyncAlways, 2309 MaxMemory: opts.JetStreamMaxMemory, 2310 MaxStore: opts.JetStreamMaxStore, 2311 Domain: opts.JetStreamDomain, 2312 CompressOK: true, 2313 UniqueTag: opts.JetStreamUniqueTag, 2314 } 2315 if err := s.EnableJetStream(cfg); err != nil { 2316 s.Fatalf("Can't start JetStream: %v", err) 2317 return 2318 } 2319 } else { 2320 // Check to see if any configured accounts have JetStream enabled. 2321 sa, ga := s.SystemAccount(), s.GlobalAccount() 2322 var hasSys, hasGlobal bool 2323 var total int 2324 2325 s.accounts.Range(func(k, v any) bool { 2326 total++ 2327 acc := v.(*Account) 2328 if acc == sa { 2329 hasSys = true 2330 } else if acc == ga { 2331 hasGlobal = true 2332 } 2333 acc.mu.RLock() 2334 hasJs := len(acc.jsLimits) > 0 2335 acc.mu.RUnlock() 2336 if hasJs { 2337 s.checkJetStreamExports() 2338 acc.enableAllJetStreamServiceImportsAndMappings() 2339 } 2340 return true 2341 }) 2342 // If we only have the system account and the global account and we are not standalone, 2343 // go ahead and enable JS on $G in case we are in simple mixed mode setup. 2344 if total == 2 && hasSys && hasGlobal && !s.standAloneMode() { 2345 ga.mu.Lock() 2346 ga.jsLimits = map[string]JetStreamAccountLimits{ 2347 _EMPTY_: dynamicJSAccountLimits, 2348 } 2349 ga.mu.Unlock() 2350 s.checkJetStreamExports() 2351 ga.enableAllJetStreamServiceImportsAndMappings() 2352 } 2353 } 2354 2355 // Start OCSP Stapling monitoring for TLS certificates if enabled. Hook TLS handshake for 2356 // OCSP check on peers (LEAF and CLIENT kind) if enabled. 2357 s.startOCSPMonitoring() 2358 2359 // Configure OCSP Response Cache for peer OCSP checks if enabled. 2360 s.initOCSPResponseCache() 2361 2362 // Start up gateway if needed. Do this before starting the routes, because 2363 // we want to resolve the gateway host:port so that this information can 2364 // be sent to other routes. 2365 if opts.Gateway.Port != 0 { 2366 s.startGateways() 2367 } 2368 2369 // Start websocket server if needed. Do this before starting the routes, and 2370 // leaf node because we want to resolve the gateway host:port so that this 2371 // information can be sent to other routes. 2372 if opts.Websocket.Port != 0 { 2373 s.startWebsocketServer() 2374 } 2375 2376 // Start up listen if we want to accept leaf node connections. 2377 if opts.LeafNode.Port != 0 { 2378 // Will resolve or assign the advertise address for the leafnode listener. 2379 // We need that in StartRouting(). 2380 s.startLeafNodeAcceptLoop() 2381 } 2382 2383 // Solicit remote servers for leaf node connections. 2384 if len(opts.LeafNode.Remotes) > 0 { 2385 s.solicitLeafNodeRemotes(opts.LeafNode.Remotes) 2386 } 2387 2388 // TODO (ik): I wanted to refactor this by starting the client 2389 // accept loop first, that is, it would resolve listen spec 2390 // in place, but start the accept-for-loop in a different go 2391 // routine. This would get rid of the synchronization between 2392 // this function and StartRouting, which I also would have wanted 2393 // to refactor, but both AcceptLoop() and StartRouting() have 2394 // been exported and not sure if that would break users using them. 2395 // We could mark them as deprecated and remove in a release or two... 2396 2397 // The Routing routine needs to wait for the client listen 2398 // port to be opened and potential ephemeral port selected. 2399 clientListenReady := make(chan struct{}) 2400 2401 // MQTT 2402 if opts.MQTT.Port != 0 { 2403 s.startMQTT() 2404 } 2405 2406 // Start up routing as well if needed. 2407 if opts.Cluster.Port != 0 { 2408 s.startGoRoutine(func() { 2409 s.StartRouting(clientListenReady) 2410 }) 2411 } 2412 2413 if opts.PortsFileDir != _EMPTY_ { 2414 s.logPorts() 2415 } 2416 2417 if opts.TLSRateLimit > 0 { 2418 s.startGoRoutine(s.logRejectedTLSConns) 2419 } 2420 2421 // We've finished starting up. 2422 close(s.startupComplete) 2423 2424 // Wait for clients. 2425 if !opts.DontListen { 2426 s.AcceptLoop(clientListenReady) 2427 } 2428 2429 // Bring OSCP Response cache online after accept loop started in anticipation of NATS-enabled cache types 2430 s.startOCSPResponseCache() 2431 } 2432 2433 func (s *Server) isShuttingDown() bool { 2434 return s.shutdown.Load() 2435 } 2436 2437 // Shutdown will shutdown the server instance by kicking out the AcceptLoop 2438 // and closing all associated clients. 2439 func (s *Server) Shutdown() { 2440 if s == nil { 2441 return 2442 } 2443 // This is for JetStream R1 Pull Consumers to allow signaling 2444 // that pending pull requests are invalid. 2445 s.signalPullConsumers() 2446 2447 // Transfer off any raft nodes that we are a leader by stepping them down. 2448 s.stepdownRaftNodes() 2449 2450 // Shutdown the eventing system as needed. 2451 // This is done first to send out any messages for 2452 // account status. We will also clean up any 2453 // eventing items associated with accounts. 2454 s.shutdownEventing() 2455 2456 // Prevent issues with multiple calls. 2457 if s.isShuttingDown() { 2458 return 2459 } 2460 2461 s.mu.Lock() 2462 s.Noticef("Initiating Shutdown...") 2463 2464 accRes := s.accResolver 2465 2466 opts := s.getOpts() 2467 2468 s.shutdown.Store(true) 2469 s.running.Store(false) 2470 s.grMu.Lock() 2471 s.grRunning = false 2472 s.grMu.Unlock() 2473 s.mu.Unlock() 2474 2475 if accRes != nil { 2476 accRes.Close() 2477 } 2478 2479 // Now check and shutdown jetstream. 2480 s.shutdownJetStream() 2481 2482 // Now shutdown the nodes 2483 s.shutdownRaftNodes() 2484 2485 s.mu.Lock() 2486 conns := make(map[uint64]*client) 2487 2488 // Copy off the clients 2489 for i, c := range s.clients { 2490 conns[i] = c 2491 } 2492 // Copy off the connections that are not yet registered 2493 // in s.routes, but for which the readLoop has started 2494 s.grMu.Lock() 2495 for i, c := range s.grTmpClients { 2496 conns[i] = c 2497 } 2498 s.grMu.Unlock() 2499 // Copy off the routes 2500 s.forEachRoute(func(r *client) { 2501 r.mu.Lock() 2502 conns[r.cid] = r 2503 r.mu.Unlock() 2504 }) 2505 // Copy off the gateways 2506 s.getAllGatewayConnections(conns) 2507 2508 // Copy off the leaf nodes 2509 for i, c := range s.leafs { 2510 conns[i] = c 2511 } 2512 2513 // Number of done channel responses we expect. 2514 doneExpected := 0 2515 2516 // Kick client AcceptLoop() 2517 if s.listener != nil { 2518 doneExpected++ 2519 s.listener.Close() 2520 s.listener = nil 2521 } 2522 2523 // Kick websocket server 2524 if s.websocket.server != nil { 2525 doneExpected++ 2526 s.websocket.server.Close() 2527 s.websocket.server = nil 2528 s.websocket.listener = nil 2529 } 2530 2531 // Kick MQTT accept loop 2532 if s.mqtt.listener != nil { 2533 doneExpected++ 2534 s.mqtt.listener.Close() 2535 s.mqtt.listener = nil 2536 } 2537 2538 // Kick leafnodes AcceptLoop() 2539 if s.leafNodeListener != nil { 2540 doneExpected++ 2541 s.leafNodeListener.Close() 2542 s.leafNodeListener = nil 2543 } 2544 2545 // Kick route AcceptLoop() 2546 if s.routeListener != nil { 2547 doneExpected++ 2548 s.routeListener.Close() 2549 s.routeListener = nil 2550 } 2551 2552 // Kick Gateway AcceptLoop() 2553 if s.gatewayListener != nil { 2554 doneExpected++ 2555 s.gatewayListener.Close() 2556 s.gatewayListener = nil 2557 } 2558 2559 // Kick HTTP monitoring if its running 2560 if s.http != nil { 2561 doneExpected++ 2562 s.http.Close() 2563 s.http = nil 2564 } 2565 2566 // Kick Profiling if its running 2567 if s.profiler != nil { 2568 doneExpected++ 2569 s.profiler.Close() 2570 } 2571 2572 s.mu.Unlock() 2573 2574 // Release go routines that wait on that channel 2575 close(s.quitCh) 2576 2577 // Close client and route connections 2578 for _, c := range conns { 2579 c.setNoReconnect() 2580 c.closeConnection(ServerShutdown) 2581 } 2582 2583 // Block until the accept loops exit 2584 for doneExpected > 0 { 2585 <-s.done 2586 doneExpected-- 2587 } 2588 2589 // Wait for go routines to be done. 2590 s.grWG.Wait() 2591 2592 if opts.PortsFileDir != _EMPTY_ { 2593 s.deletePortsFile(opts.PortsFileDir) 2594 } 2595 2596 s.Noticef("Server Exiting..") 2597 2598 // Stop OCSP Response Cache 2599 if s.ocsprc != nil { 2600 s.ocsprc.Stop(s) 2601 } 2602 2603 // Close logger if applicable. It allows tests on Windows 2604 // to be able to do proper cleanup (delete log file). 2605 s.logging.RLock() 2606 log := s.logging.logger 2607 s.logging.RUnlock() 2608 if log != nil { 2609 if l, ok := log.(*logger.Logger); ok { 2610 l.Close() 2611 } 2612 } 2613 // Notify that the shutdown is complete 2614 close(s.shutdownComplete) 2615 } 2616 2617 // WaitForShutdown will block until the server has been fully shutdown. 2618 func (s *Server) WaitForShutdown() { 2619 <-s.shutdownComplete 2620 } 2621 2622 // AcceptLoop is exported for easier testing. 2623 func (s *Server) AcceptLoop(clr chan struct{}) { 2624 // If we were to exit before the listener is setup properly, 2625 // make sure we close the channel. 2626 defer func() { 2627 if clr != nil { 2628 close(clr) 2629 } 2630 }() 2631 2632 if s.isShuttingDown() { 2633 return 2634 } 2635 2636 // Snapshot server options. 2637 opts := s.getOpts() 2638 2639 // Setup state that can enable shutdown 2640 s.mu.Lock() 2641 hp := net.JoinHostPort(opts.Host, strconv.Itoa(opts.Port)) 2642 l, e := natsListen("tcp", hp) 2643 s.listenerErr = e 2644 if e != nil { 2645 s.mu.Unlock() 2646 s.Fatalf("Error listening on port: %s, %q", hp, e) 2647 return 2648 } 2649 s.Noticef("Listening for client connections on %s", 2650 net.JoinHostPort(opts.Host, strconv.Itoa(l.Addr().(*net.TCPAddr).Port))) 2651 2652 // Alert of TLS enabled. 2653 if opts.TLSConfig != nil { 2654 s.Noticef("TLS required for client connections") 2655 if opts.TLSHandshakeFirst && opts.TLSHandshakeFirstFallback == 0 { 2656 s.Warnf("Clients that are not using \"TLS Handshake First\" option will fail to connect") 2657 } 2658 } 2659 2660 // If server was started with RANDOM_PORT (-1), opts.Port would be equal 2661 // to 0 at the beginning this function. So we need to get the actual port 2662 if opts.Port == 0 { 2663 // Write resolved port back to options. 2664 opts.Port = l.Addr().(*net.TCPAddr).Port 2665 } 2666 2667 // Now that port has been set (if it was set to RANDOM), set the 2668 // server's info Host/Port with either values from Options or 2669 // ClientAdvertise. 2670 if err := s.setInfoHostPort(); err != nil { 2671 s.Fatalf("Error setting server INFO with ClientAdvertise value of %s, err=%v", opts.ClientAdvertise, err) 2672 l.Close() 2673 s.mu.Unlock() 2674 return 2675 } 2676 // Keep track of client connect URLs. We may need them later. 2677 s.clientConnectURLs = s.getClientConnectURLs() 2678 s.listener = l 2679 2680 go s.acceptConnections(l, "Client", func(conn net.Conn) { s.createClient(conn) }, 2681 func(_ error) bool { 2682 if s.isLameDuckMode() { 2683 // Signal that we are not accepting new clients 2684 s.ldmCh <- true 2685 // Now wait for the Shutdown... 2686 <-s.quitCh 2687 return true 2688 } 2689 return false 2690 }) 2691 s.mu.Unlock() 2692 2693 // Let the caller know that we are ready 2694 close(clr) 2695 clr = nil 2696 } 2697 2698 // InProcessConn returns an in-process connection to the server, 2699 // avoiding the need to use a TCP listener for local connectivity 2700 // within the same process. This can be used regardless of the 2701 // state of the DontListen option. 2702 func (s *Server) InProcessConn() (net.Conn, error) { 2703 pl, pr := net.Pipe() 2704 if !s.startGoRoutine(func() { 2705 s.createClientInProcess(pl) 2706 s.grWG.Done() 2707 }) { 2708 pl.Close() 2709 pr.Close() 2710 return nil, fmt.Errorf("failed to create connection") 2711 } 2712 return pr, nil 2713 } 2714 2715 func (s *Server) acceptConnections(l net.Listener, acceptName string, createFunc func(conn net.Conn), errFunc func(err error) bool) { 2716 tmpDelay := ACCEPT_MIN_SLEEP 2717 2718 for { 2719 conn, err := l.Accept() 2720 if err != nil { 2721 if errFunc != nil && errFunc(err) { 2722 return 2723 } 2724 if tmpDelay = s.acceptError(acceptName, err, tmpDelay); tmpDelay < 0 { 2725 break 2726 } 2727 continue 2728 } 2729 tmpDelay = ACCEPT_MIN_SLEEP 2730 if !s.startGoRoutine(func() { 2731 s.reloadMu.RLock() 2732 createFunc(conn) 2733 s.reloadMu.RUnlock() 2734 s.grWG.Done() 2735 }) { 2736 conn.Close() 2737 } 2738 } 2739 s.Debugf(acceptName + " accept loop exiting..") 2740 s.done <- true 2741 } 2742 2743 // This function sets the server's info Host/Port based on server Options. 2744 // Note that this function may be called during config reload, this is why 2745 // Host/Port may be reset to original Options if the ClientAdvertise option 2746 // is not set (since it may have previously been). 2747 func (s *Server) setInfoHostPort() error { 2748 // When this function is called, opts.Port is set to the actual listen 2749 // port (if option was originally set to RANDOM), even during a config 2750 // reload. So use of s.opts.Port is safe. 2751 opts := s.getOpts() 2752 if opts.ClientAdvertise != _EMPTY_ { 2753 h, p, err := parseHostPort(opts.ClientAdvertise, opts.Port) 2754 if err != nil { 2755 return err 2756 } 2757 s.info.Host = h 2758 s.info.Port = p 2759 } else { 2760 s.info.Host = opts.Host 2761 s.info.Port = opts.Port 2762 } 2763 return nil 2764 } 2765 2766 // StartProfiler is called to enable dynamic profiling. 2767 func (s *Server) StartProfiler() { 2768 if s.isShuttingDown() { 2769 return 2770 } 2771 2772 // Snapshot server options. 2773 opts := s.getOpts() 2774 2775 port := opts.ProfPort 2776 2777 // Check for Random Port 2778 if port == -1 { 2779 port = 0 2780 } 2781 2782 s.mu.Lock() 2783 hp := net.JoinHostPort(opts.Host, strconv.Itoa(port)) 2784 l, err := net.Listen("tcp", hp) 2785 2786 if err != nil { 2787 s.mu.Unlock() 2788 s.Fatalf("error starting profiler: %s", err) 2789 return 2790 } 2791 s.Noticef("profiling port: %d", l.Addr().(*net.TCPAddr).Port) 2792 2793 srv := &http.Server{ 2794 Addr: hp, 2795 Handler: http.DefaultServeMux, 2796 MaxHeaderBytes: 1 << 20, 2797 } 2798 s.profiler = l 2799 s.profilingServer = srv 2800 2801 s.setBlockProfileRate(opts.ProfBlockRate) 2802 2803 go func() { 2804 // if this errors out, it's probably because the server is being shutdown 2805 err := srv.Serve(l) 2806 if err != nil { 2807 if !s.isShuttingDown() { 2808 s.Fatalf("error starting profiler: %s", err) 2809 } 2810 } 2811 srv.Close() 2812 s.done <- true 2813 }() 2814 s.mu.Unlock() 2815 } 2816 2817 func (s *Server) setBlockProfileRate(rate int) { 2818 // Passing i ProfBlockRate <= 0 here will disable or > 0 will enable. 2819 runtime.SetBlockProfileRate(rate) 2820 2821 if rate > 0 { 2822 s.Warnf("Block profiling is enabled (rate %d), this may have a performance impact", rate) 2823 } 2824 } 2825 2826 // StartHTTPMonitoring will enable the HTTP monitoring port. 2827 // DEPRECATED: Should use StartMonitoring. 2828 func (s *Server) StartHTTPMonitoring() { 2829 s.startMonitoring(false) 2830 } 2831 2832 // StartHTTPSMonitoring will enable the HTTPS monitoring port. 2833 // DEPRECATED: Should use StartMonitoring. 2834 func (s *Server) StartHTTPSMonitoring() { 2835 s.startMonitoring(true) 2836 } 2837 2838 // StartMonitoring starts the HTTP or HTTPs server if needed. 2839 func (s *Server) StartMonitoring() error { 2840 // Snapshot server options. 2841 opts := s.getOpts() 2842 2843 // Specifying both HTTP and HTTPS ports is a misconfiguration 2844 if opts.HTTPPort != 0 && opts.HTTPSPort != 0 { 2845 return fmt.Errorf("can't specify both HTTP (%v) and HTTPs (%v) ports", opts.HTTPPort, opts.HTTPSPort) 2846 } 2847 var err error 2848 if opts.HTTPPort != 0 { 2849 err = s.startMonitoring(false) 2850 } else if opts.HTTPSPort != 0 { 2851 if opts.TLSConfig == nil { 2852 return fmt.Errorf("TLS cert and key required for HTTPS") 2853 } 2854 err = s.startMonitoring(true) 2855 } 2856 return err 2857 } 2858 2859 // HTTP endpoints 2860 const ( 2861 RootPath = "/" 2862 VarzPath = "/varz" 2863 ConnzPath = "/connz" 2864 RoutezPath = "/routez" 2865 GatewayzPath = "/gatewayz" 2866 LeafzPath = "/leafz" 2867 SubszPath = "/subsz" 2868 StackszPath = "/stacksz" 2869 AccountzPath = "/accountz" 2870 AccountStatzPath = "/accstatz" 2871 JszPath = "/jsz" 2872 HealthzPath = "/healthz" 2873 IPQueuesPath = "/ipqueuesz" 2874 ) 2875 2876 func (s *Server) basePath(p string) string { 2877 return path.Join(s.httpBasePath, p) 2878 } 2879 2880 type captureHTTPServerLog struct { 2881 s *Server 2882 prefix string 2883 } 2884 2885 func (cl *captureHTTPServerLog) Write(p []byte) (int, error) { 2886 var buf [128]byte 2887 var b = buf[:0] 2888 2889 b = append(b, []byte(cl.prefix)...) 2890 offset := 0 2891 if bytes.HasPrefix(p, []byte("http:")) { 2892 offset = 6 2893 } 2894 b = append(b, p[offset:]...) 2895 cl.s.Errorf(string(b)) 2896 return len(p), nil 2897 } 2898 2899 // The TLS configuration is passed to the listener when the monitoring 2900 // "server" is setup. That prevents TLS configuration updates on reload 2901 // from being used. By setting this function in tls.Config.GetConfigForClient 2902 // we instruct the TLS handshake to ask for the tls configuration to be 2903 // used for a specific client. We don't care which client, we always use 2904 // the same TLS configuration. 2905 func (s *Server) getMonitoringTLSConfig(_ *tls.ClientHelloInfo) (*tls.Config, error) { 2906 opts := s.getOpts() 2907 tc := opts.TLSConfig.Clone() 2908 tc.ClientAuth = tls.NoClientCert 2909 return tc, nil 2910 } 2911 2912 // Start the monitoring server 2913 func (s *Server) startMonitoring(secure bool) error { 2914 if s.isShuttingDown() { 2915 return nil 2916 } 2917 2918 // Snapshot server options. 2919 opts := s.getOpts() 2920 2921 var ( 2922 hp string 2923 err error 2924 httpListener net.Listener 2925 port int 2926 ) 2927 2928 monitorProtocol := "http" 2929 2930 if secure { 2931 monitorProtocol += "s" 2932 port = opts.HTTPSPort 2933 if port == -1 { 2934 port = 0 2935 } 2936 hp = net.JoinHostPort(opts.HTTPHost, strconv.Itoa(port)) 2937 config := opts.TLSConfig.Clone() 2938 config.GetConfigForClient = s.getMonitoringTLSConfig 2939 config.ClientAuth = tls.NoClientCert 2940 httpListener, err = tls.Listen("tcp", hp, config) 2941 2942 } else { 2943 port = opts.HTTPPort 2944 if port == -1 { 2945 port = 0 2946 } 2947 hp = net.JoinHostPort(opts.HTTPHost, strconv.Itoa(port)) 2948 httpListener, err = net.Listen("tcp", hp) 2949 } 2950 2951 if err != nil { 2952 return fmt.Errorf("can't listen to the monitor port: %v", err) 2953 } 2954 2955 rport := httpListener.Addr().(*net.TCPAddr).Port 2956 s.Noticef("Starting %s monitor on %s", monitorProtocol, net.JoinHostPort(opts.HTTPHost, strconv.Itoa(rport))) 2957 2958 mux := http.NewServeMux() 2959 2960 // Root 2961 mux.HandleFunc(s.basePath(RootPath), s.HandleRoot) 2962 // Varz 2963 mux.HandleFunc(s.basePath(VarzPath), s.HandleVarz) 2964 // Connz 2965 mux.HandleFunc(s.basePath(ConnzPath), s.HandleConnz) 2966 // Routez 2967 mux.HandleFunc(s.basePath(RoutezPath), s.HandleRoutez) 2968 // Gatewayz 2969 mux.HandleFunc(s.basePath(GatewayzPath), s.HandleGatewayz) 2970 // Leafz 2971 mux.HandleFunc(s.basePath(LeafzPath), s.HandleLeafz) 2972 // Subz 2973 mux.HandleFunc(s.basePath(SubszPath), s.HandleSubsz) 2974 // Subz alias for backwards compatibility 2975 mux.HandleFunc(s.basePath("/subscriptionsz"), s.HandleSubsz) 2976 // Stacksz 2977 mux.HandleFunc(s.basePath(StackszPath), s.HandleStacksz) 2978 // Accountz 2979 mux.HandleFunc(s.basePath(AccountzPath), s.HandleAccountz) 2980 // Accstatz 2981 mux.HandleFunc(s.basePath(AccountStatzPath), s.HandleAccountStatz) 2982 // Jsz 2983 mux.HandleFunc(s.basePath(JszPath), s.HandleJsz) 2984 // Healthz 2985 mux.HandleFunc(s.basePath(HealthzPath), s.HandleHealthz) 2986 // IPQueuesz 2987 mux.HandleFunc(s.basePath(IPQueuesPath), s.HandleIPQueuesz) 2988 2989 // Do not set a WriteTimeout because it could cause cURL/browser 2990 // to return empty response or unable to display page if the 2991 // server needs more time to build the response. 2992 srv := &http.Server{ 2993 Addr: hp, 2994 Handler: mux, 2995 MaxHeaderBytes: 1 << 20, 2996 ErrorLog: log.New(&captureHTTPServerLog{s, "monitoring: "}, _EMPTY_, 0), 2997 } 2998 s.mu.Lock() 2999 s.http = httpListener 3000 s.httpHandler = mux 3001 s.monitoringServer = srv 3002 s.mu.Unlock() 3003 3004 go func() { 3005 if err := srv.Serve(httpListener); err != nil { 3006 if !s.isShuttingDown() { 3007 s.Fatalf("Error starting monitor on %q: %v", hp, err) 3008 } 3009 } 3010 srv.Close() 3011 s.mu.Lock() 3012 s.httpHandler = nil 3013 s.mu.Unlock() 3014 s.done <- true 3015 }() 3016 3017 return nil 3018 } 3019 3020 // HTTPHandler returns the http.Handler object used to handle monitoring 3021 // endpoints. It will return nil if the server is not configured for 3022 // monitoring, or if the server has not been started yet (Server.Start()). 3023 func (s *Server) HTTPHandler() http.Handler { 3024 s.mu.Lock() 3025 defer s.mu.Unlock() 3026 return s.httpHandler 3027 } 3028 3029 // Perform a conditional deep copy due to reference nature of [Client|WS]ConnectURLs. 3030 // If updates are made to Info, this function should be consulted and updated. 3031 // Assume lock is held. 3032 func (s *Server) copyInfo() Info { 3033 info := s.info 3034 if len(info.ClientConnectURLs) > 0 { 3035 info.ClientConnectURLs = append([]string(nil), s.info.ClientConnectURLs...) 3036 } 3037 if len(info.WSConnectURLs) > 0 { 3038 info.WSConnectURLs = append([]string(nil), s.info.WSConnectURLs...) 3039 } 3040 return info 3041 } 3042 3043 // tlsMixConn is used when we can receive both TLS and non-TLS connections on same port. 3044 type tlsMixConn struct { 3045 net.Conn 3046 pre *bytes.Buffer 3047 } 3048 3049 // Read for our mixed multi-reader. 3050 func (c *tlsMixConn) Read(b []byte) (int, error) { 3051 if c.pre != nil { 3052 n, err := c.pre.Read(b) 3053 if c.pre.Len() == 0 { 3054 c.pre = nil 3055 } 3056 return n, err 3057 } 3058 return c.Conn.Read(b) 3059 } 3060 3061 func (s *Server) createClient(conn net.Conn) *client { 3062 return s.createClientEx(conn, false) 3063 } 3064 3065 func (s *Server) createClientInProcess(conn net.Conn) *client { 3066 return s.createClientEx(conn, true) 3067 } 3068 3069 func (s *Server) createClientEx(conn net.Conn, inProcess bool) *client { 3070 // Snapshot server options. 3071 opts := s.getOpts() 3072 3073 maxPay := int32(opts.MaxPayload) 3074 maxSubs := int32(opts.MaxSubs) 3075 // For system, maxSubs of 0 means unlimited, so re-adjust here. 3076 if maxSubs == 0 { 3077 maxSubs = -1 3078 } 3079 now := time.Now() 3080 3081 c := &client{srv: s, nc: conn, opts: defaultOpts, mpay: maxPay, msubs: maxSubs, start: now, last: now} 3082 3083 c.registerWithAccount(s.globalAccount()) 3084 3085 var info Info 3086 var authRequired bool 3087 3088 s.mu.Lock() 3089 // Grab JSON info string 3090 info = s.copyInfo() 3091 if s.nonceRequired() { 3092 // Nonce handling 3093 var raw [nonceLen]byte 3094 nonce := raw[:] 3095 s.generateNonce(nonce) 3096 info.Nonce = string(nonce) 3097 } 3098 c.nonce = []byte(info.Nonce) 3099 authRequired = info.AuthRequired 3100 3101 // Check to see if we have auth_required set but we also have a no_auth_user. 3102 // If so set back to false. 3103 if info.AuthRequired && opts.NoAuthUser != _EMPTY_ && opts.NoAuthUser != s.sysAccOnlyNoAuthUser { 3104 info.AuthRequired = false 3105 } 3106 3107 // Check to see if this is an in-process connection with tls_required. 3108 // If so, set as not required, but available. 3109 if inProcess && info.TLSRequired { 3110 info.TLSRequired = false 3111 info.TLSAvailable = true 3112 } 3113 3114 s.totalClients++ 3115 s.mu.Unlock() 3116 3117 // Grab lock 3118 c.mu.Lock() 3119 if authRequired { 3120 c.flags.set(expectConnect) 3121 } 3122 3123 // Initialize 3124 c.initClient() 3125 3126 c.Debugf("Client connection created") 3127 3128 // Save info.TLSRequired value since we may neeed to change it back and forth. 3129 orgInfoTLSReq := info.TLSRequired 3130 3131 var tlsFirstFallback time.Duration 3132 // Check if we should do TLS first. 3133 tlsFirst := opts.TLSConfig != nil && opts.TLSHandshakeFirst 3134 if tlsFirst { 3135 // Make sure info.TLSRequired is set to true (it could be false 3136 // if AllowNonTLS is enabled). 3137 info.TLSRequired = true 3138 // Get the fallback delay value if applicable. 3139 if f := opts.TLSHandshakeFirstFallback; f > 0 { 3140 tlsFirstFallback = f 3141 } else if inProcess { 3142 // For in-process connection, we will always have a fallback 3143 // delay. It allows support for non-TLS, TLS and "TLS First" 3144 // in-process clients to successfully connect. 3145 tlsFirstFallback = DEFAULT_TLS_HANDSHAKE_FIRST_FALLBACK_DELAY 3146 } 3147 } 3148 3149 // Decide if we are going to require TLS or not and generate INFO json. 3150 tlsRequired := info.TLSRequired 3151 infoBytes := c.generateClientInfoJSON(info) 3152 3153 // Send our information, except if TLS and TLSHandshakeFirst is requested. 3154 if !tlsFirst { 3155 // Need to be sent in place since writeLoop cannot be started until 3156 // TLS handshake is done (if applicable). 3157 c.sendProtoNow(infoBytes) 3158 } 3159 3160 // Unlock to register 3161 c.mu.Unlock() 3162 3163 // Register with the server. 3164 s.mu.Lock() 3165 // If server is not running, Shutdown() may have already gathered the 3166 // list of connections to close. It won't contain this one, so we need 3167 // to bail out now otherwise the readLoop started down there would not 3168 // be interrupted. Skip also if in lame duck mode. 3169 if !s.isRunning() || s.ldm { 3170 // There are some tests that create a server but don't start it, 3171 // and use "async" clients and perform the parsing manually. Such 3172 // clients would branch here (since server is not running). However, 3173 // when a server was really running and has been shutdown, we must 3174 // close this connection. 3175 if s.isShuttingDown() { 3176 conn.Close() 3177 } 3178 s.mu.Unlock() 3179 return c 3180 } 3181 3182 // If there is a max connections specified, check that adding 3183 // this new client would not push us over the max 3184 if opts.MaxConn > 0 && len(s.clients) >= opts.MaxConn { 3185 s.mu.Unlock() 3186 c.maxConnExceeded() 3187 return nil 3188 } 3189 s.clients[c.cid] = c 3190 3191 s.mu.Unlock() 3192 3193 // Re-Grab lock 3194 c.mu.Lock() 3195 3196 isClosed := c.isClosed() 3197 var pre []byte 3198 // We need first to check for "TLS First" fallback delay. 3199 if !isClosed && tlsFirstFallback > 0 { 3200 // We wait and see if we are getting any data. Since we did not send 3201 // the INFO protocol yet, only clients that use TLS first should be 3202 // sending data (the TLS handshake). We don't really check the content: 3203 // if it is a rogue agent and not an actual client performing the 3204 // TLS handshake, the error will be detected when performing the 3205 // handshake on our side. 3206 pre = make([]byte, 4) 3207 c.nc.SetReadDeadline(time.Now().Add(tlsFirstFallback)) 3208 n, _ := io.ReadFull(c.nc, pre[:]) 3209 c.nc.SetReadDeadline(time.Time{}) 3210 // If we get any data (regardless of possible timeout), we will proceed 3211 // with the TLS handshake. 3212 if n > 0 { 3213 pre = pre[:n] 3214 } else { 3215 // We did not get anything so we will send the INFO protocol. 3216 pre = nil 3217 3218 // Restore the original info.TLSRequired value if it is 3219 // different that the current value and regenerate infoBytes. 3220 if orgInfoTLSReq != info.TLSRequired { 3221 info.TLSRequired = orgInfoTLSReq 3222 infoBytes = c.generateClientInfoJSON(info) 3223 } 3224 c.sendProtoNow(infoBytes) 3225 // Set the boolean to false for the rest of the function. 3226 tlsFirst = false 3227 // Check closed status again 3228 isClosed = c.isClosed() 3229 } 3230 } 3231 // If we have both TLS and non-TLS allowed we need to see which 3232 // one the client wants. We'll always allow this for in-process 3233 // connections. 3234 if !isClosed && !tlsFirst && opts.TLSConfig != nil && (inProcess || opts.AllowNonTLS) { 3235 pre = make([]byte, 4) 3236 c.nc.SetReadDeadline(time.Now().Add(secondsToDuration(opts.TLSTimeout))) 3237 n, _ := io.ReadFull(c.nc, pre[:]) 3238 c.nc.SetReadDeadline(time.Time{}) 3239 pre = pre[:n] 3240 if n > 0 && pre[0] == 0x16 { 3241 tlsRequired = true 3242 } else { 3243 tlsRequired = false 3244 } 3245 } 3246 3247 // Check for TLS 3248 if !isClosed && tlsRequired { 3249 if s.connRateCounter != nil && !s.connRateCounter.allow() { 3250 c.mu.Unlock() 3251 c.sendErr("Connection throttling is active. Please try again later.") 3252 c.closeConnection(MaxConnectionsExceeded) 3253 return nil 3254 } 3255 3256 // If we have a prebuffer create a multi-reader. 3257 if len(pre) > 0 { 3258 c.nc = &tlsMixConn{c.nc, bytes.NewBuffer(pre)} 3259 // Clear pre so it is not parsed. 3260 pre = nil 3261 } 3262 // Performs server-side TLS handshake. 3263 if err := c.doTLSServerHandshake(_EMPTY_, opts.TLSConfig, opts.TLSTimeout, opts.TLSPinnedCerts); err != nil { 3264 c.mu.Unlock() 3265 return nil 3266 } 3267 } 3268 3269 // Now, send the INFO if it was delayed 3270 if !isClosed && tlsFirst { 3271 c.flags.set(didTLSFirst) 3272 c.sendProtoNow(infoBytes) 3273 // Check closed status 3274 isClosed = c.isClosed() 3275 } 3276 3277 // Connection could have been closed while sending the INFO proto. 3278 if isClosed { 3279 c.mu.Unlock() 3280 // We need to call closeConnection() to make sure that proper cleanup is done. 3281 c.closeConnection(WriteError) 3282 return nil 3283 } 3284 3285 // Check for Auth. We schedule this timer after the TLS handshake to avoid 3286 // the race where the timer fires during the handshake and causes the 3287 // server to write bad data to the socket. See issue #432. 3288 if authRequired { 3289 c.setAuthTimer(secondsToDuration(opts.AuthTimeout)) 3290 } 3291 3292 // Do final client initialization 3293 3294 // Set the Ping timer. Will be reset once connect was received. 3295 c.setPingTimer() 3296 3297 // Spin up the read loop. 3298 s.startGoRoutine(func() { c.readLoop(pre) }) 3299 3300 // Spin up the write loop. 3301 s.startGoRoutine(func() { c.writeLoop() }) 3302 3303 if tlsRequired { 3304 c.Debugf("TLS handshake complete") 3305 cs := c.nc.(*tls.Conn).ConnectionState() 3306 c.Debugf("TLS version %s, cipher suite %s", tlsVersion(cs.Version), tlsCipher(cs.CipherSuite)) 3307 } 3308 3309 c.mu.Unlock() 3310 3311 return c 3312 } 3313 3314 // This will save off a closed client in a ring buffer such that 3315 // /connz can inspect. Useful for debugging, etc. 3316 func (s *Server) saveClosedClient(c *client, nc net.Conn, reason ClosedState) { 3317 now := time.Now() 3318 3319 s.accountDisconnectEvent(c, now, reason.String()) 3320 3321 c.mu.Lock() 3322 3323 cc := &closedClient{} 3324 cc.fill(c, nc, now, false) 3325 cc.Stop = &now 3326 cc.Reason = reason.String() 3327 3328 // Do subs, do not place by default in main ConnInfo 3329 if len(c.subs) > 0 { 3330 cc.subs = make([]SubDetail, 0, len(c.subs)) 3331 for _, sub := range c.subs { 3332 cc.subs = append(cc.subs, newSubDetail(sub)) 3333 } 3334 // Now set this to nil to allow connection to be released. 3335 c.subs = nil 3336 } 3337 // Hold user as well. 3338 cc.user = c.getRawAuthUser() 3339 // Hold account name if not the global account. 3340 if c.acc != nil && c.acc.Name != globalAccountName { 3341 cc.acc = c.acc.Name 3342 } 3343 cc.JWT = c.opts.JWT 3344 cc.IssuerKey = issuerForClient(c) 3345 cc.Tags = c.tags 3346 cc.NameTag = c.nameTag 3347 c.mu.Unlock() 3348 3349 // Place in the ring buffer 3350 s.mu.Lock() 3351 if s.closed != nil { 3352 s.closed.append(cc) 3353 } 3354 s.mu.Unlock() 3355 } 3356 3357 // Adds to the list of client and websocket clients connect URLs. 3358 // If there was a change, an INFO protocol is sent to registered clients 3359 // that support async INFO protocols. 3360 // Server lock held on entry. 3361 func (s *Server) addConnectURLsAndSendINFOToClients(curls, wsurls []string) { 3362 s.updateServerINFOAndSendINFOToClients(curls, wsurls, true) 3363 } 3364 3365 // Removes from the list of client and websocket clients connect URLs. 3366 // If there was a change, an INFO protocol is sent to registered clients 3367 // that support async INFO protocols. 3368 // Server lock held on entry. 3369 func (s *Server) removeConnectURLsAndSendINFOToClients(curls, wsurls []string) { 3370 s.updateServerINFOAndSendINFOToClients(curls, wsurls, false) 3371 } 3372 3373 // Updates the list of client and websocket clients connect URLs and if any change 3374 // sends an async INFO update to clients that support it. 3375 // Server lock held on entry. 3376 func (s *Server) updateServerINFOAndSendINFOToClients(curls, wsurls []string, add bool) { 3377 remove := !add 3378 // Will return true if we need alter the server's Info object. 3379 updateMap := func(urls []string, m refCountedUrlSet) bool { 3380 wasUpdated := false 3381 for _, url := range urls { 3382 if add && m.addUrl(url) { 3383 wasUpdated = true 3384 } else if remove && m.removeUrl(url) { 3385 wasUpdated = true 3386 } 3387 } 3388 return wasUpdated 3389 } 3390 cliUpdated := updateMap(curls, s.clientConnectURLsMap) 3391 wsUpdated := updateMap(wsurls, s.websocket.connectURLsMap) 3392 3393 updateInfo := func(infoURLs *[]string, urls []string, m refCountedUrlSet) { 3394 // Recreate the info's slice from the map 3395 *infoURLs = (*infoURLs)[:0] 3396 // Add this server client connect ULRs first... 3397 *infoURLs = append(*infoURLs, urls...) 3398 // Then the ones from the map 3399 for url := range m { 3400 *infoURLs = append(*infoURLs, url) 3401 } 3402 } 3403 if cliUpdated { 3404 updateInfo(&s.info.ClientConnectURLs, s.clientConnectURLs, s.clientConnectURLsMap) 3405 } 3406 if wsUpdated { 3407 updateInfo(&s.info.WSConnectURLs, s.websocket.connectURLs, s.websocket.connectURLsMap) 3408 } 3409 if cliUpdated || wsUpdated { 3410 // Update the time of this update 3411 s.lastCURLsUpdate = time.Now().UnixNano() 3412 // Send to all registered clients that support async INFO protocols. 3413 s.sendAsyncInfoToClients(cliUpdated, wsUpdated) 3414 } 3415 } 3416 3417 // Handle closing down a connection when the handshake has timedout. 3418 func tlsTimeout(c *client, conn *tls.Conn) { 3419 c.mu.Lock() 3420 closed := c.isClosed() 3421 c.mu.Unlock() 3422 // Check if already closed 3423 if closed { 3424 return 3425 } 3426 cs := conn.ConnectionState() 3427 if !cs.HandshakeComplete { 3428 c.Errorf("TLS handshake timeout") 3429 c.sendErr("Secure Connection - TLS Required") 3430 c.closeConnection(TLSHandshakeError) 3431 } 3432 } 3433 3434 // Seems silly we have to write these 3435 func tlsVersion(ver uint16) string { 3436 switch ver { 3437 case tls.VersionTLS10: 3438 return "1.0" 3439 case tls.VersionTLS11: 3440 return "1.1" 3441 case tls.VersionTLS12: 3442 return "1.2" 3443 case tls.VersionTLS13: 3444 return "1.3" 3445 } 3446 return fmt.Sprintf("Unknown [0x%x]", ver) 3447 } 3448 3449 // We use hex here so we don't need multiple versions 3450 func tlsCipher(cs uint16) string { 3451 name, present := cipherMapByID[cs] 3452 if present { 3453 return name 3454 } 3455 return fmt.Sprintf("Unknown [0x%x]", cs) 3456 } 3457 3458 // Remove a client or route from our internal accounting. 3459 func (s *Server) removeClient(c *client) { 3460 // kind is immutable, so can check without lock 3461 switch c.kind { 3462 case CLIENT: 3463 c.mu.Lock() 3464 cid := c.cid 3465 updateProtoInfoCount := false 3466 if c.kind == CLIENT && c.opts.Protocol >= ClientProtoInfo { 3467 updateProtoInfoCount = true 3468 } 3469 c.mu.Unlock() 3470 3471 s.mu.Lock() 3472 delete(s.clients, cid) 3473 if updateProtoInfoCount { 3474 s.cproto-- 3475 } 3476 s.mu.Unlock() 3477 case ROUTER: 3478 s.removeRoute(c) 3479 case GATEWAY: 3480 s.removeRemoteGatewayConnection(c) 3481 case LEAF: 3482 s.removeLeafNodeConnection(c) 3483 } 3484 } 3485 3486 func (s *Server) removeFromTempClients(cid uint64) { 3487 s.grMu.Lock() 3488 delete(s.grTmpClients, cid) 3489 s.grMu.Unlock() 3490 } 3491 3492 func (s *Server) addToTempClients(cid uint64, c *client) bool { 3493 added := false 3494 s.grMu.Lock() 3495 if s.grRunning { 3496 s.grTmpClients[cid] = c 3497 added = true 3498 } 3499 s.grMu.Unlock() 3500 return added 3501 } 3502 3503 ///////////////////////////////////////////////////////////////// 3504 // These are some helpers for accounting in functional tests. 3505 ///////////////////////////////////////////////////////////////// 3506 3507 // NumRoutes will report the number of registered routes. 3508 func (s *Server) NumRoutes() int { 3509 s.mu.RLock() 3510 defer s.mu.RUnlock() 3511 return s.numRoutes() 3512 } 3513 3514 // numRoutes will report the number of registered routes. 3515 // Server lock held on entry 3516 func (s *Server) numRoutes() int { 3517 var nr int 3518 s.forEachRoute(func(c *client) { 3519 nr++ 3520 }) 3521 return nr 3522 } 3523 3524 // NumRemotes will report number of registered remotes. 3525 func (s *Server) NumRemotes() int { 3526 s.mu.RLock() 3527 defer s.mu.RUnlock() 3528 return s.numRemotes() 3529 } 3530 3531 // numRemotes will report number of registered remotes. 3532 // Server lock held on entry 3533 func (s *Server) numRemotes() int { 3534 return len(s.routes) 3535 } 3536 3537 // NumLeafNodes will report number of leaf node connections. 3538 func (s *Server) NumLeafNodes() int { 3539 s.mu.RLock() 3540 defer s.mu.RUnlock() 3541 return len(s.leafs) 3542 } 3543 3544 // NumClients will report the number of registered clients. 3545 func (s *Server) NumClients() int { 3546 s.mu.RLock() 3547 defer s.mu.RUnlock() 3548 return len(s.clients) 3549 } 3550 3551 // GetClient will return the client associated with cid. 3552 func (s *Server) GetClient(cid uint64) *client { 3553 return s.getClient(cid) 3554 } 3555 3556 // getClient will return the client associated with cid. 3557 func (s *Server) getClient(cid uint64) *client { 3558 s.mu.RLock() 3559 defer s.mu.RUnlock() 3560 return s.clients[cid] 3561 } 3562 3563 // GetLeafNode returns the leafnode associated with the cid. 3564 func (s *Server) GetLeafNode(cid uint64) *client { 3565 s.mu.RLock() 3566 defer s.mu.RUnlock() 3567 return s.leafs[cid] 3568 } 3569 3570 // NumSubscriptions will report how many subscriptions are active. 3571 func (s *Server) NumSubscriptions() uint32 { 3572 s.mu.RLock() 3573 defer s.mu.RUnlock() 3574 return s.numSubscriptions() 3575 } 3576 3577 // numSubscriptions will report how many subscriptions are active. 3578 // Lock should be held. 3579 func (s *Server) numSubscriptions() uint32 { 3580 var subs int 3581 s.accounts.Range(func(k, v any) bool { 3582 acc := v.(*Account) 3583 subs += acc.TotalSubs() 3584 return true 3585 }) 3586 return uint32(subs) 3587 } 3588 3589 // NumSlowConsumers will report the number of slow consumers. 3590 func (s *Server) NumSlowConsumers() int64 { 3591 return atomic.LoadInt64(&s.slowConsumers) 3592 } 3593 3594 // NumSlowConsumersClients will report the number of slow consumers clients. 3595 func (s *Server) NumSlowConsumersClients() uint64 { 3596 return s.scStats.clients.Load() 3597 } 3598 3599 // NumSlowConsumersRoutes will report the number of slow consumers routes. 3600 func (s *Server) NumSlowConsumersRoutes() uint64 { 3601 return s.scStats.routes.Load() 3602 } 3603 3604 // NumSlowConsumersGateways will report the number of slow consumers leafs. 3605 func (s *Server) NumSlowConsumersGateways() uint64 { 3606 return s.scStats.gateways.Load() 3607 } 3608 3609 // NumSlowConsumersLeafs will report the number of slow consumers leafs. 3610 func (s *Server) NumSlowConsumersLeafs() uint64 { 3611 return s.scStats.leafs.Load() 3612 } 3613 3614 // ConfigTime will report the last time the server configuration was loaded. 3615 func (s *Server) ConfigTime() time.Time { 3616 s.mu.RLock() 3617 defer s.mu.RUnlock() 3618 return s.configTime 3619 } 3620 3621 // Addr will return the net.Addr object for the current listener. 3622 func (s *Server) Addr() net.Addr { 3623 s.mu.RLock() 3624 defer s.mu.RUnlock() 3625 if s.listener == nil { 3626 return nil 3627 } 3628 return s.listener.Addr() 3629 } 3630 3631 // MonitorAddr will return the net.Addr object for the monitoring listener. 3632 func (s *Server) MonitorAddr() *net.TCPAddr { 3633 s.mu.RLock() 3634 defer s.mu.RUnlock() 3635 if s.http == nil { 3636 return nil 3637 } 3638 return s.http.Addr().(*net.TCPAddr) 3639 } 3640 3641 // ClusterAddr returns the net.Addr object for the route listener. 3642 func (s *Server) ClusterAddr() *net.TCPAddr { 3643 s.mu.RLock() 3644 defer s.mu.RUnlock() 3645 if s.routeListener == nil { 3646 return nil 3647 } 3648 return s.routeListener.Addr().(*net.TCPAddr) 3649 } 3650 3651 // ProfilerAddr returns the net.Addr object for the profiler listener. 3652 func (s *Server) ProfilerAddr() *net.TCPAddr { 3653 s.mu.RLock() 3654 defer s.mu.RUnlock() 3655 if s.profiler == nil { 3656 return nil 3657 } 3658 return s.profiler.Addr().(*net.TCPAddr) 3659 } 3660 3661 func (s *Server) readyForConnections(d time.Duration) error { 3662 // Snapshot server options. 3663 opts := s.getOpts() 3664 3665 type info struct { 3666 ok bool 3667 err error 3668 } 3669 chk := make(map[string]info) 3670 3671 end := time.Now().Add(d) 3672 for time.Now().Before(end) { 3673 s.mu.RLock() 3674 chk["server"] = info{ok: s.listener != nil || opts.DontListen, err: s.listenerErr} 3675 chk["route"] = info{ok: (opts.Cluster.Port == 0 || s.routeListener != nil), err: s.routeListenerErr} 3676 chk["gateway"] = info{ok: (opts.Gateway.Name == _EMPTY_ || s.gatewayListener != nil), err: s.gatewayListenerErr} 3677 chk["leafnode"] = info{ok: (opts.LeafNode.Port == 0 || s.leafNodeListener != nil), err: s.leafNodeListenerErr} 3678 chk["websocket"] = info{ok: (opts.Websocket.Port == 0 || s.websocket.listener != nil), err: s.websocket.listenerErr} 3679 chk["mqtt"] = info{ok: (opts.MQTT.Port == 0 || s.mqtt.listener != nil), err: s.mqtt.listenerErr} 3680 s.mu.RUnlock() 3681 3682 var numOK int 3683 for _, inf := range chk { 3684 if inf.ok { 3685 numOK++ 3686 } 3687 } 3688 if numOK == len(chk) { 3689 // In the case of DontListen option (no accept loop), we still want 3690 // to make sure that Start() has done all the work, so we wait on 3691 // that. 3692 if opts.DontListen { 3693 select { 3694 case <-s.startupComplete: 3695 case <-time.After(d): 3696 return fmt.Errorf("failed to be ready for connections after %s: startup did not complete", d) 3697 } 3698 } 3699 return nil 3700 } 3701 if d > 25*time.Millisecond { 3702 time.Sleep(25 * time.Millisecond) 3703 } 3704 } 3705 3706 failed := make([]string, 0, len(chk)) 3707 for name, inf := range chk { 3708 if inf.ok && inf.err != nil { 3709 failed = append(failed, fmt.Sprintf("%s(ok, but %s)", name, inf.err)) 3710 } 3711 if !inf.ok && inf.err == nil { 3712 failed = append(failed, name) 3713 } 3714 if !inf.ok && inf.err != nil { 3715 failed = append(failed, fmt.Sprintf("%s(%s)", name, inf.err)) 3716 } 3717 } 3718 3719 return fmt.Errorf( 3720 "failed to be ready for connections after %s: %s", 3721 d, strings.Join(failed, ", "), 3722 ) 3723 } 3724 3725 // ReadyForConnections returns `true` if the server is ready to accept clients 3726 // and, if routing is enabled, route connections. If after the duration 3727 // `dur` the server is still not ready, returns `false`. 3728 func (s *Server) ReadyForConnections(dur time.Duration) bool { 3729 return s.readyForConnections(dur) == nil 3730 } 3731 3732 // Quick utility to function to tell if the server supports headers. 3733 func (s *Server) supportsHeaders() bool { 3734 if s == nil { 3735 return false 3736 } 3737 return !(s.getOpts().NoHeaderSupport) 3738 } 3739 3740 // ID returns the server's ID 3741 func (s *Server) ID() string { 3742 return s.info.ID 3743 } 3744 3745 // NodeName returns the node name for this server. 3746 func (s *Server) NodeName() string { 3747 return getHash(s.info.Name) 3748 } 3749 3750 // Name returns the server's name. This will be the same as the ID if it was not set. 3751 func (s *Server) Name() string { 3752 return s.info.Name 3753 } 3754 3755 func (s *Server) String() string { 3756 return s.info.Name 3757 } 3758 3759 type pprofLabels map[string]string 3760 3761 func setGoRoutineLabels(tags ...pprofLabels) { 3762 var labels []string 3763 for _, m := range tags { 3764 for k, v := range m { 3765 labels = append(labels, k, v) 3766 } 3767 } 3768 if len(labels) > 0 { 3769 pprof.SetGoroutineLabels( 3770 pprof.WithLabels(context.Background(), pprof.Labels(labels...)), 3771 ) 3772 } 3773 } 3774 3775 func (s *Server) startGoRoutine(f func(), tags ...pprofLabels) bool { 3776 var started bool 3777 s.grMu.Lock() 3778 defer s.grMu.Unlock() 3779 if s.grRunning { 3780 s.grWG.Add(1) 3781 go func() { 3782 setGoRoutineLabels(tags...) 3783 f() 3784 }() 3785 started = true 3786 } 3787 return started 3788 } 3789 3790 func (s *Server) numClosedConns() int { 3791 s.mu.RLock() 3792 defer s.mu.RUnlock() 3793 return s.closed.len() 3794 } 3795 3796 func (s *Server) totalClosedConns() uint64 { 3797 s.mu.RLock() 3798 defer s.mu.RUnlock() 3799 return s.closed.totalConns() 3800 } 3801 3802 func (s *Server) closedClients() []*closedClient { 3803 s.mu.RLock() 3804 defer s.mu.RUnlock() 3805 return s.closed.closedClients() 3806 } 3807 3808 // getClientConnectURLs returns suitable URLs for clients to connect to the listen 3809 // port based on the server options' Host and Port. If the Host corresponds to 3810 // "any" interfaces, this call returns the list of resolved IP addresses. 3811 // If ClientAdvertise is set, returns the client advertise host and port. 3812 // The server lock is assumed held on entry. 3813 func (s *Server) getClientConnectURLs() []string { 3814 // Snapshot server options. 3815 opts := s.getOpts() 3816 // Ignore error here since we know that if there is client advertise, the 3817 // parseHostPort is correct because we did it right before calling this 3818 // function in Server.New(). 3819 urls, _ := s.getConnectURLs(opts.ClientAdvertise, opts.Host, opts.Port) 3820 return urls 3821 } 3822 3823 // Generic version that will return an array of URLs based on the given 3824 // advertise, host and port values. 3825 func (s *Server) getConnectURLs(advertise, host string, port int) ([]string, error) { 3826 urls := make([]string, 0, 1) 3827 3828 // short circuit if advertise is set 3829 if advertise != "" { 3830 h, p, err := parseHostPort(advertise, port) 3831 if err != nil { 3832 return nil, err 3833 } 3834 urls = append(urls, net.JoinHostPort(h, strconv.Itoa(p))) 3835 } else { 3836 sPort := strconv.Itoa(port) 3837 _, ips, err := s.getNonLocalIPsIfHostIsIPAny(host, true) 3838 for _, ip := range ips { 3839 urls = append(urls, net.JoinHostPort(ip, sPort)) 3840 } 3841 if err != nil || len(urls) == 0 { 3842 // We are here if s.opts.Host is not "0.0.0.0" nor "::", or if for some 3843 // reason we could not add any URL in the loop above. 3844 // We had a case where a Windows VM was hosed and would have err == nil 3845 // and not add any address in the array in the loop above, and we 3846 // ended-up returning 0.0.0.0, which is problematic for Windows clients. 3847 // Check for 0.0.0.0 or :: specifically, and ignore if that's the case. 3848 if host == "0.0.0.0" || host == "::" { 3849 s.Errorf("Address %q can not be resolved properly", host) 3850 } else { 3851 urls = append(urls, net.JoinHostPort(host, sPort)) 3852 } 3853 } 3854 } 3855 return urls, nil 3856 } 3857 3858 // Returns an array of non local IPs if the provided host is 3859 // 0.0.0.0 or ::. It returns the first resolved if `all` is 3860 // false. 3861 // The boolean indicate if the provided host was 0.0.0.0 (or ::) 3862 // so that if the returned array is empty caller can decide 3863 // what to do next. 3864 func (s *Server) getNonLocalIPsIfHostIsIPAny(host string, all bool) (bool, []string, error) { 3865 ip := net.ParseIP(host) 3866 // If this is not an IP, we are done 3867 if ip == nil { 3868 return false, nil, nil 3869 } 3870 // If this is not 0.0.0.0 or :: we have nothing to do. 3871 if !ip.IsUnspecified() { 3872 return false, nil, nil 3873 } 3874 s.Debugf("Get non local IPs for %q", host) 3875 var ips []string 3876 ifaces, _ := net.Interfaces() 3877 for _, i := range ifaces { 3878 addrs, _ := i.Addrs() 3879 for _, addr := range addrs { 3880 switch v := addr.(type) { 3881 case *net.IPNet: 3882 ip = v.IP 3883 case *net.IPAddr: 3884 ip = v.IP 3885 } 3886 ipStr := ip.String() 3887 // Skip non global unicast addresses 3888 if !ip.IsGlobalUnicast() || ip.IsUnspecified() { 3889 ip = nil 3890 continue 3891 } 3892 s.Debugf(" ip=%s", ipStr) 3893 ips = append(ips, ipStr) 3894 if !all { 3895 break 3896 } 3897 } 3898 } 3899 return true, ips, nil 3900 } 3901 3902 // if the ip is not specified, attempt to resolve it 3903 func resolveHostPorts(addr net.Listener) []string { 3904 hostPorts := make([]string, 0) 3905 hp := addr.Addr().(*net.TCPAddr) 3906 port := strconv.Itoa(hp.Port) 3907 if hp.IP.IsUnspecified() { 3908 var ip net.IP 3909 ifaces, _ := net.Interfaces() 3910 for _, i := range ifaces { 3911 addrs, _ := i.Addrs() 3912 for _, addr := range addrs { 3913 switch v := addr.(type) { 3914 case *net.IPNet: 3915 ip = v.IP 3916 hostPorts = append(hostPorts, net.JoinHostPort(ip.String(), port)) 3917 case *net.IPAddr: 3918 ip = v.IP 3919 hostPorts = append(hostPorts, net.JoinHostPort(ip.String(), port)) 3920 default: 3921 continue 3922 } 3923 } 3924 } 3925 } else { 3926 hostPorts = append(hostPorts, net.JoinHostPort(hp.IP.String(), port)) 3927 } 3928 return hostPorts 3929 } 3930 3931 // format the address of a net.Listener with a protocol 3932 func formatURL(protocol string, addr net.Listener) []string { 3933 hostports := resolveHostPorts(addr) 3934 for i, hp := range hostports { 3935 hostports[i] = fmt.Sprintf("%s://%s", protocol, hp) 3936 } 3937 return hostports 3938 } 3939 3940 // Ports describes URLs that the server can be contacted in 3941 type Ports struct { 3942 Nats []string `json:"nats,omitempty"` 3943 Monitoring []string `json:"monitoring,omitempty"` 3944 Cluster []string `json:"cluster,omitempty"` 3945 Profile []string `json:"profile,omitempty"` 3946 WebSocket []string `json:"websocket,omitempty"` 3947 } 3948 3949 // PortsInfo attempts to resolve all the ports. If after maxWait the ports are not 3950 // resolved, it returns nil. Otherwise it returns a Ports struct 3951 // describing ports where the server can be contacted 3952 func (s *Server) PortsInfo(maxWait time.Duration) *Ports { 3953 if s.readyForListeners(maxWait) { 3954 opts := s.getOpts() 3955 3956 s.mu.RLock() 3957 tls := s.info.TLSRequired 3958 listener := s.listener 3959 httpListener := s.http 3960 clusterListener := s.routeListener 3961 profileListener := s.profiler 3962 wsListener := s.websocket.listener 3963 wss := s.websocket.tls 3964 s.mu.RUnlock() 3965 3966 ports := Ports{} 3967 3968 if listener != nil { 3969 natsProto := "nats" 3970 if tls { 3971 natsProto = "tls" 3972 } 3973 ports.Nats = formatURL(natsProto, listener) 3974 } 3975 3976 if httpListener != nil { 3977 monProto := "http" 3978 if opts.HTTPSPort != 0 { 3979 monProto = "https" 3980 } 3981 ports.Monitoring = formatURL(monProto, httpListener) 3982 } 3983 3984 if clusterListener != nil { 3985 clusterProto := "nats" 3986 if opts.Cluster.TLSConfig != nil { 3987 clusterProto = "tls" 3988 } 3989 ports.Cluster = formatURL(clusterProto, clusterListener) 3990 } 3991 3992 if profileListener != nil { 3993 ports.Profile = formatURL("http", profileListener) 3994 } 3995 3996 if wsListener != nil { 3997 protocol := wsSchemePrefix 3998 if wss { 3999 protocol = wsSchemePrefixTLS 4000 } 4001 ports.WebSocket = formatURL(protocol, wsListener) 4002 } 4003 4004 return &ports 4005 } 4006 4007 return nil 4008 } 4009 4010 // Returns the portsFile. If a non-empty dirHint is provided, the dirHint 4011 // path is used instead of the server option value 4012 func (s *Server) portFile(dirHint string) string { 4013 dirname := s.getOpts().PortsFileDir 4014 if dirHint != "" { 4015 dirname = dirHint 4016 } 4017 if dirname == _EMPTY_ { 4018 return _EMPTY_ 4019 } 4020 return filepath.Join(dirname, fmt.Sprintf("%s_%d.ports", filepath.Base(os.Args[0]), os.Getpid())) 4021 } 4022 4023 // Delete the ports file. If a non-empty dirHint is provided, the dirHint 4024 // path is used instead of the server option value 4025 func (s *Server) deletePortsFile(hintDir string) { 4026 portsFile := s.portFile(hintDir) 4027 if portsFile != "" { 4028 if err := os.Remove(portsFile); err != nil { 4029 s.Errorf("Error cleaning up ports file %s: %v", portsFile, err) 4030 } 4031 } 4032 } 4033 4034 // Writes a file with a serialized Ports to the specified ports_file_dir. 4035 // The name of the file is `exename_pid.ports`, typically nats-server_pid.ports. 4036 // if ports file is not set, this function has no effect 4037 func (s *Server) logPorts() { 4038 opts := s.getOpts() 4039 portsFile := s.portFile(opts.PortsFileDir) 4040 if portsFile != _EMPTY_ { 4041 go func() { 4042 info := s.PortsInfo(5 * time.Second) 4043 if info == nil { 4044 s.Errorf("Unable to resolve the ports in the specified time") 4045 return 4046 } 4047 data, err := json.Marshal(info) 4048 if err != nil { 4049 s.Errorf("Error marshaling ports file: %v", err) 4050 return 4051 } 4052 if err := os.WriteFile(portsFile, data, 0666); err != nil { 4053 s.Errorf("Error writing ports file (%s): %v", portsFile, err) 4054 return 4055 } 4056 4057 }() 4058 } 4059 } 4060 4061 // waits until a calculated list of listeners is resolved or a timeout 4062 func (s *Server) readyForListeners(dur time.Duration) bool { 4063 end := time.Now().Add(dur) 4064 for time.Now().Before(end) { 4065 s.mu.RLock() 4066 listeners := s.serviceListeners() 4067 s.mu.RUnlock() 4068 if len(listeners) == 0 { 4069 return false 4070 } 4071 4072 ok := true 4073 for _, l := range listeners { 4074 if l == nil { 4075 ok = false 4076 break 4077 } 4078 } 4079 if ok { 4080 return true 4081 } 4082 select { 4083 case <-s.quitCh: 4084 return false 4085 case <-time.After(25 * time.Millisecond): 4086 // continue - unable to select from quit - we are still running 4087 } 4088 } 4089 return false 4090 } 4091 4092 // returns a list of listeners that are intended for the process 4093 // if the entry is nil, the interface is yet to be resolved 4094 func (s *Server) serviceListeners() []net.Listener { 4095 listeners := make([]net.Listener, 0) 4096 opts := s.getOpts() 4097 listeners = append(listeners, s.listener) 4098 if opts.Cluster.Port != 0 { 4099 listeners = append(listeners, s.routeListener) 4100 } 4101 if opts.HTTPPort != 0 || opts.HTTPSPort != 0 { 4102 listeners = append(listeners, s.http) 4103 } 4104 if opts.ProfPort != 0 { 4105 listeners = append(listeners, s.profiler) 4106 } 4107 if opts.Websocket.Port != 0 { 4108 listeners = append(listeners, s.websocket.listener) 4109 } 4110 return listeners 4111 } 4112 4113 // Returns true if in lame duck mode. 4114 func (s *Server) isLameDuckMode() bool { 4115 s.mu.RLock() 4116 defer s.mu.RUnlock() 4117 return s.ldm 4118 } 4119 4120 // This function will close the client listener then close the clients 4121 // at some interval to avoid a reconnect storm. 4122 // We will also transfer any raft leaders and shutdown JetStream. 4123 func (s *Server) lameDuckMode() { 4124 s.mu.Lock() 4125 // Check if there is actually anything to do 4126 if s.isShuttingDown() || s.ldm || s.listener == nil { 4127 s.mu.Unlock() 4128 return 4129 } 4130 s.Noticef("Entering lame duck mode, stop accepting new clients") 4131 s.ldm = true 4132 s.sendLDMShutdownEventLocked() 4133 expected := 1 4134 s.listener.Close() 4135 s.listener = nil 4136 if s.websocket.server != nil { 4137 expected++ 4138 s.websocket.server.Close() 4139 s.websocket.server = nil 4140 s.websocket.listener = nil 4141 } 4142 s.ldmCh = make(chan bool, expected) 4143 opts := s.getOpts() 4144 gp := opts.LameDuckGracePeriod 4145 // For tests, we want the grace period to be in some cases bigger 4146 // than the ldm duration, so to by-pass the validateOptions() check, 4147 // we use negative number and flip it here. 4148 if gp < 0 { 4149 gp *= -1 4150 } 4151 s.mu.Unlock() 4152 4153 // If we are running any raftNodes transfer leaders. 4154 if hadTransfers := s.transferRaftLeaders(); hadTransfers { 4155 // They will transfer leadership quickly, but wait here for a second. 4156 select { 4157 case <-time.After(time.Second): 4158 case <-s.quitCh: 4159 return 4160 } 4161 } 4162 4163 // Now check and shutdown jetstream. 4164 s.shutdownJetStream() 4165 4166 // Now shutdown the nodes 4167 s.shutdownRaftNodes() 4168 4169 // Wait for accept loops to be done to make sure that no new 4170 // client can connect 4171 for i := 0; i < expected; i++ { 4172 <-s.ldmCh 4173 } 4174 4175 s.mu.Lock() 4176 // Need to recheck few things 4177 if s.isShuttingDown() || len(s.clients) == 0 { 4178 s.mu.Unlock() 4179 // If there is no client, we need to call Shutdown() to complete 4180 // the LDMode. If server has been shutdown while lock was released, 4181 // calling Shutdown() should be no-op. 4182 s.Shutdown() 4183 return 4184 } 4185 dur := int64(opts.LameDuckDuration) 4186 dur -= int64(gp) 4187 if dur <= 0 { 4188 dur = int64(time.Second) 4189 } 4190 numClients := int64(len(s.clients)) 4191 batch := 1 4192 // Sleep interval between each client connection close. 4193 var si int64 4194 if numClients != 0 { 4195 si = dur / numClients 4196 } 4197 if si < 1 { 4198 // Should not happen (except in test with very small LD duration), but 4199 // if there are too many clients, batch the number of close and 4200 // use a tiny sleep interval that will result in yield likely. 4201 si = 1 4202 batch = int(numClients / dur) 4203 } else if si > int64(time.Second) { 4204 // Conversely, there is no need to sleep too long between clients 4205 // and spread say 10 clients for the 2min duration. Sleeping no 4206 // more than 1sec. 4207 si = int64(time.Second) 4208 } 4209 4210 // Now capture all clients 4211 clients := make([]*client, 0, len(s.clients)) 4212 for _, client := range s.clients { 4213 clients = append(clients, client) 4214 } 4215 // Now that we know that no new client can be accepted, 4216 // send INFO to routes and clients to notify this state. 4217 s.sendLDMToRoutes() 4218 s.sendLDMToClients() 4219 s.mu.Unlock() 4220 4221 t := time.NewTimer(gp) 4222 // Delay start of closing of client connections in case 4223 // we have several servers that we want to signal to enter LD mode 4224 // and not have their client reconnect to each other. 4225 select { 4226 case <-t.C: 4227 s.Noticef("Closing existing clients") 4228 case <-s.quitCh: 4229 t.Stop() 4230 return 4231 } 4232 for i, client := range clients { 4233 client.closeConnection(ServerShutdown) 4234 if i == len(clients)-1 { 4235 break 4236 } 4237 if batch == 1 || i%batch == 0 { 4238 // We pick a random interval which will be at least si/2 4239 v := rand.Int63n(si) 4240 if v < si/2 { 4241 v = si / 2 4242 } 4243 t.Reset(time.Duration(v)) 4244 // Sleep for given interval or bail out if kicked by Shutdown(). 4245 select { 4246 case <-t.C: 4247 case <-s.quitCh: 4248 t.Stop() 4249 return 4250 } 4251 } 4252 } 4253 s.Shutdown() 4254 } 4255 4256 // Send an INFO update to routes with the indication that this server is in LDM mode. 4257 // Server lock is held on entry. 4258 func (s *Server) sendLDMToRoutes() { 4259 s.routeInfo.LameDuckMode = true 4260 infoJSON := generateInfoJSON(&s.routeInfo) 4261 s.forEachRemote(func(r *client) { 4262 r.mu.Lock() 4263 r.enqueueProto(infoJSON) 4264 r.mu.Unlock() 4265 }) 4266 // Clear now so that we notify only once, should we have to send other INFOs. 4267 s.routeInfo.LameDuckMode = false 4268 } 4269 4270 // Send an INFO update to clients with the indication that this server is in 4271 // LDM mode and with only URLs of other nodes. 4272 // Server lock is held on entry. 4273 func (s *Server) sendLDMToClients() { 4274 s.info.LameDuckMode = true 4275 // Clear this so that if there are further updates, we don't send our URLs. 4276 s.clientConnectURLs = s.clientConnectURLs[:0] 4277 if s.websocket.connectURLs != nil { 4278 s.websocket.connectURLs = s.websocket.connectURLs[:0] 4279 } 4280 // Reset content first. 4281 s.info.ClientConnectURLs = s.info.ClientConnectURLs[:0] 4282 s.info.WSConnectURLs = s.info.WSConnectURLs[:0] 4283 // Only add the other nodes if we are allowed to. 4284 if !s.getOpts().Cluster.NoAdvertise { 4285 for url := range s.clientConnectURLsMap { 4286 s.info.ClientConnectURLs = append(s.info.ClientConnectURLs, url) 4287 } 4288 for url := range s.websocket.connectURLsMap { 4289 s.info.WSConnectURLs = append(s.info.WSConnectURLs, url) 4290 } 4291 } 4292 // Send to all registered clients that support async INFO protocols. 4293 s.sendAsyncInfoToClients(true, true) 4294 // We now clear the info.LameDuckMode flag so that if there are 4295 // cluster updates and we send the INFO, we don't have the boolean 4296 // set which would cause multiple LDM notifications to clients. 4297 s.info.LameDuckMode = false 4298 } 4299 4300 // If given error is a net.Error and is temporary, sleeps for the given 4301 // delay and double it, but cap it to ACCEPT_MAX_SLEEP. The sleep is 4302 // interrupted if the server is shutdown. 4303 // An error message is displayed depending on the type of error. 4304 // Returns the new (or unchanged) delay, or a negative value if the 4305 // server has been or is being shutdown. 4306 func (s *Server) acceptError(acceptName string, err error, tmpDelay time.Duration) time.Duration { 4307 if !s.isRunning() { 4308 return -1 4309 } 4310 //lint:ignore SA1019 We want to retry on a bunch of errors here. 4311 if ne, ok := err.(net.Error); ok && ne.Temporary() { // nolint:staticcheck 4312 s.Errorf("Temporary %s Accept Error(%v), sleeping %dms", acceptName, ne, tmpDelay/time.Millisecond) 4313 select { 4314 case <-time.After(tmpDelay): 4315 case <-s.quitCh: 4316 return -1 4317 } 4318 tmpDelay *= 2 4319 if tmpDelay > ACCEPT_MAX_SLEEP { 4320 tmpDelay = ACCEPT_MAX_SLEEP 4321 } 4322 } else { 4323 s.Errorf("%s Accept error: %v", acceptName, err) 4324 } 4325 return tmpDelay 4326 } 4327 4328 var errNoIPAvail = errors.New("no IP available") 4329 4330 func (s *Server) getRandomIP(resolver netResolver, url string, excludedAddresses map[string]struct{}) (string, error) { 4331 host, port, err := net.SplitHostPort(url) 4332 if err != nil { 4333 return "", err 4334 } 4335 // If already an IP, skip. 4336 if net.ParseIP(host) != nil { 4337 return url, nil 4338 } 4339 ips, err := resolver.LookupHost(context.Background(), host) 4340 if err != nil { 4341 return "", fmt.Errorf("lookup for host %q: %v", host, err) 4342 } 4343 if len(excludedAddresses) > 0 { 4344 for i := 0; i < len(ips); i++ { 4345 ip := ips[i] 4346 addr := net.JoinHostPort(ip, port) 4347 if _, excluded := excludedAddresses[addr]; excluded { 4348 if len(ips) == 1 { 4349 ips = nil 4350 break 4351 } 4352 ips[i] = ips[len(ips)-1] 4353 ips = ips[:len(ips)-1] 4354 i-- 4355 } 4356 } 4357 if len(ips) == 0 { 4358 return "", errNoIPAvail 4359 } 4360 } 4361 var address string 4362 if len(ips) == 0 { 4363 s.Warnf("Unable to get IP for %s, will try with %s: %v", host, url, err) 4364 address = url 4365 } else { 4366 var ip string 4367 if len(ips) == 1 { 4368 ip = ips[0] 4369 } else { 4370 ip = ips[rand.Int31n(int32(len(ips)))] 4371 } 4372 // add the port 4373 address = net.JoinHostPort(ip, port) 4374 } 4375 return address, nil 4376 } 4377 4378 // Returns true for the first attempt and depending on the nature 4379 // of the attempt (first connect or a reconnect), when the number 4380 // of attempts is equal to the configured report attempts. 4381 func (s *Server) shouldReportConnectErr(firstConnect bool, attempts int) bool { 4382 opts := s.getOpts() 4383 if firstConnect { 4384 if attempts == 1 || attempts%opts.ConnectErrorReports == 0 { 4385 return true 4386 } 4387 return false 4388 } 4389 if attempts == 1 || attempts%opts.ReconnectErrorReports == 0 { 4390 return true 4391 } 4392 return false 4393 } 4394 4395 func (s *Server) updateRemoteSubscription(acc *Account, sub *subscription, delta int32) { 4396 s.updateRouteSubscriptionMap(acc, sub, delta) 4397 if s.gateway.enabled { 4398 s.gatewayUpdateSubInterest(acc.Name, sub, delta) 4399 } 4400 4401 acc.updateLeafNodes(sub, delta) 4402 } 4403 4404 func (s *Server) startRateLimitLogExpiration() { 4405 interval := time.Second 4406 s.startGoRoutine(func() { 4407 defer s.grWG.Done() 4408 4409 ticker := time.NewTicker(time.Second) 4410 defer ticker.Stop() 4411 for { 4412 select { 4413 case <-s.quitCh: 4414 return 4415 case interval = <-s.rateLimitLoggingCh: 4416 ticker.Reset(interval) 4417 case <-ticker.C: 4418 s.rateLimitLogging.Range(func(k, v any) bool { 4419 start := v.(time.Time) 4420 if time.Since(start) >= interval { 4421 s.rateLimitLogging.Delete(k) 4422 } 4423 return true 4424 }) 4425 } 4426 } 4427 }) 4428 } 4429 4430 func (s *Server) changeRateLimitLogInterval(d time.Duration) { 4431 if d <= 0 { 4432 return 4433 } 4434 select { 4435 case s.rateLimitLoggingCh <- d: 4436 default: 4437 } 4438 } 4439 4440 // DisconnectClientByID disconnects a client by connection ID 4441 func (s *Server) DisconnectClientByID(id uint64) error { 4442 client := s.clients[id] 4443 if client != nil { 4444 client.closeConnection(Kicked) 4445 return nil 4446 } 4447 return errors.New("no such client id") 4448 } 4449 4450 // LDMClientByID sends a Lame Duck Mode info message to a client by connection ID 4451 func (s *Server) LDMClientByID(id uint64) error { 4452 info := s.copyInfo() 4453 info.LameDuckMode = true 4454 4455 c := s.clients[id] 4456 if c != nil { 4457 c.mu.Lock() 4458 defer c.mu.Unlock() 4459 if c.opts.Protocol >= ClientProtoInfo && 4460 c.flags.isSet(firstPongSent) { 4461 // sendInfo takes care of checking if the connection is still 4462 // valid or not, so don't duplicate tests here. 4463 c.Debugf("sending Lame Duck Mode info to client") 4464 c.enqueueProto(c.generateClientInfoJSON(info)) 4465 return nil 4466 } else { 4467 return errors.New("ClientProtoInfo < ClientOps.Protocol or first pong not sent") 4468 } 4469 } 4470 return errors.New("no such client id") 4471 }