get.pme.sh/pnats@v0.0.0-20240304004023-26bb5a137ed0/server/server.go (about) 1 // Copyright 2012-2024 The NATS Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package server 15 16 import ( 17 "bytes" 18 "context" 19 "crypto/tls" 20 "encoding/json" 21 "errors" 22 "flag" 23 "fmt" 24 "hash/fnv" 25 "io" 26 "log" 27 "math/rand" 28 "net" 29 "net/http" 30 "regexp" 31 "runtime/pprof" 32 33 // Allow dynamic profiling. 34 _ "net/http/pprof" 35 "os" 36 "path" 37 "path/filepath" 38 "runtime" 39 "strconv" 40 "strings" 41 "sync" 42 "sync/atomic" 43 "time" 44 45 "github.com/klauspost/compress/s2" 46 "github.com/nats-io/jwt/v2" 47 "github.com/nats-io/nkeys" 48 "github.com/nats-io/nuid" 49 50 "get.pme.sh/pnats/logger" 51 ) 52 53 const ( 54 // Interval for the first PING for non client connections. 55 firstPingInterval = time.Second 56 57 // This is for the first ping for client connections. 58 firstClientPingInterval = 2 * time.Second 59 ) 60 61 // These are protocol versions sent between server connections: ROUTER, LEAF and 62 // GATEWAY. We may have protocol versions that have a meaning only for a certain 63 // type of connections, but we don't have to have separate enums for that. 64 // However, it is CRITICAL to not change the order of those constants since they 65 // are exchanged between servers. When adding a new protocol version, add to the 66 // end of the list, don't try to group them by connection types. 67 const ( 68 // RouteProtoZero is the original Route protocol from 2009. 69 // http://nats.io/documentation/internals/nats-protocol/ 70 RouteProtoZero = iota 71 // RouteProtoInfo signals a route can receive more then the original INFO block. 72 // This can be used to update remote cluster permissions, etc... 73 RouteProtoInfo 74 // RouteProtoV2 is the new route/cluster protocol that provides account support. 75 RouteProtoV2 76 // MsgTraceProto indicates that this server understands distributed message tracing. 77 MsgTraceProto 78 ) 79 80 // Will return the latest server-to-server protocol versions, unless the 81 // option to override it is set. 82 func (s *Server) getServerProto() int { 83 opts := s.getOpts() 84 // Initialize with the latest protocol version. 85 proto := MsgTraceProto 86 // For tests, we want to be able to make this server behave 87 // as an older server so check this option to see if we should override. 88 if opts.overrideProto < 0 { 89 // The option overrideProto is set to 0 by default (when creating an 90 // Options structure). Since this is the same value than the original 91 // proto RouteProtoZero, tests call setServerProtoForTest() with the 92 // desired protocol level, which sets it as negative value equal to: 93 // (wantedProto + 1) * -1. Here we compute back the real value. 94 proto = (opts.overrideProto * -1) - 1 95 } 96 return proto 97 } 98 99 // Used by tests. 100 func setServerProtoForTest(wantedProto int) int { 101 return (wantedProto + 1) * -1 102 } 103 104 // Info is the information sent to clients, routes, gateways, and leaf nodes, 105 // to help them understand information about this server. 106 type Info struct { 107 ID string `json:"server_id"` 108 Name string `json:"server_name"` 109 Version string `json:"version"` 110 Proto int `json:"proto"` 111 GitCommit string `json:"git_commit,omitempty"` 112 GoVersion string `json:"go"` 113 Host string `json:"host"` 114 Port int `json:"port"` 115 Headers bool `json:"headers"` 116 AuthRequired bool `json:"auth_required,omitempty"` 117 TLSRequired bool `json:"tls_required,omitempty"` 118 TLSVerify bool `json:"tls_verify,omitempty"` 119 TLSAvailable bool `json:"tls_available,omitempty"` 120 MaxPayload int32 `json:"max_payload"` 121 JetStream bool `json:"jetstream,omitempty"` 122 IP string `json:"ip,omitempty"` 123 CID uint64 `json:"client_id,omitempty"` 124 ClientIP string `json:"client_ip,omitempty"` 125 Nonce string `json:"nonce,omitempty"` 126 Cluster string `json:"cluster,omitempty"` 127 Dynamic bool `json:"cluster_dynamic,omitempty"` 128 Domain string `json:"domain,omitempty"` 129 ClientConnectURLs []string `json:"connect_urls,omitempty"` // Contains URLs a client can connect to. 130 WSConnectURLs []string `json:"ws_connect_urls,omitempty"` // Contains URLs a ws client can connect to. 131 LameDuckMode bool `json:"ldm,omitempty"` 132 Compression string `json:"compression,omitempty"` 133 134 // Route Specific 135 Import *SubjectPermission `json:"import,omitempty"` 136 Export *SubjectPermission `json:"export,omitempty"` 137 LNOC bool `json:"lnoc,omitempty"` 138 InfoOnConnect bool `json:"info_on_connect,omitempty"` // When true the server will respond to CONNECT with an INFO 139 ConnectInfo bool `json:"connect_info,omitempty"` // When true this is the server INFO response to CONNECT 140 RoutePoolSize int `json:"route_pool_size,omitempty"` 141 RoutePoolIdx int `json:"route_pool_idx,omitempty"` 142 RouteAccount string `json:"route_account,omitempty"` 143 RouteAccReqID string `json:"route_acc_add_reqid,omitempty"` 144 145 // Gateways Specific 146 Gateway string `json:"gateway,omitempty"` // Name of the origin Gateway (sent by gateway's INFO) 147 GatewayURLs []string `json:"gateway_urls,omitempty"` // Gateway URLs in the originating cluster (sent by gateway's INFO) 148 GatewayURL string `json:"gateway_url,omitempty"` // Gateway URL on that server (sent by route's INFO) 149 GatewayCmd byte `json:"gateway_cmd,omitempty"` // Command code for the receiving server to know what to do 150 GatewayCmdPayload []byte `json:"gateway_cmd_payload,omitempty"` // Command payload when needed 151 GatewayNRP bool `json:"gateway_nrp,omitempty"` // Uses new $GNR. prefix for mapped replies 152 GatewayIOM bool `json:"gateway_iom,omitempty"` // Indicate that all accounts will be switched to InterestOnly mode "right away" 153 154 // LeafNode Specific 155 LeafNodeURLs []string `json:"leafnode_urls,omitempty"` // LeafNode URLs that the server can reconnect to. 156 RemoteAccount string `json:"remote_account,omitempty"` // Lets the other side know the remote account that they bind to. 157 158 XKey string `json:"xkey,omitempty"` // Public server's x25519 key. 159 } 160 161 // Server is our main struct. 162 type Server struct { 163 // Fields accessed with atomic operations need to be 64-bit aligned 164 gcid uint64 165 // How often user logon fails due to the issuer account not being pinned. 166 pinnedAccFail uint64 167 stats 168 scStats 169 mu sync.RWMutex 170 kp nkeys.KeyPair 171 xkp nkeys.KeyPair 172 xpub string 173 info Info 174 configFile string 175 optsMu sync.RWMutex 176 opts *Options 177 running atomic.Bool 178 shutdown atomic.Bool 179 listener net.Listener 180 listenerErr error 181 gacc *Account 182 sys *internal 183 js atomic.Pointer[jetStream] 184 isMetaLeader atomic.Bool 185 accounts sync.Map 186 tmpAccounts sync.Map // Temporarily stores accounts that are being built 187 activeAccounts int32 188 accResolver AccountResolver 189 clients map[uint64]*client 190 routes map[string][]*client 191 routesPoolSize int // Configured pool size 192 routesReject bool // During reload, we may want to reject adding routes until some conditions are met 193 routesNoPool int // Number of routes that don't use pooling (connecting to older server for instance) 194 accRoutes map[string]map[string]*client // Key is account name, value is key=remoteID/value=route connection 195 accRouteByHash sync.Map // Key is account name, value is nil or a pool index 196 accAddedCh chan struct{} 197 accAddedReqID string 198 leafs map[uint64]*client 199 users map[string]*User 200 nkeys map[string]*NkeyUser 201 totalClients uint64 202 closed *closedRingBuffer 203 done chan bool 204 start time.Time 205 http net.Listener 206 httpHandler http.Handler 207 httpBasePath string 208 profiler net.Listener 209 httpReqStats map[string]uint64 210 routeListener net.Listener 211 routeListenerErr error 212 routeInfo Info 213 routeResolver netResolver 214 routesToSelf map[string]struct{} 215 routeTLSName string 216 leafNodeListener net.Listener 217 leafNodeListenerErr error 218 leafNodeInfo Info 219 leafNodeInfoJSON []byte 220 leafURLsMap refCountedUrlSet 221 leafNodeOpts struct { 222 resolver netResolver 223 dialTimeout time.Duration 224 } 225 leafRemoteCfgs []*leafNodeCfg 226 leafRemoteAccounts sync.Map 227 leafNodeEnabled bool 228 leafDisableConnect bool // Used in test only 229 230 quitCh chan struct{} 231 startupComplete chan struct{} 232 shutdownComplete chan struct{} 233 234 // Tracking Go routines 235 grMu sync.Mutex 236 grTmpClients map[uint64]*client 237 grRunning bool 238 grWG sync.WaitGroup // to wait on various go routines 239 240 cproto int64 // number of clients supporting async INFO 241 configTime time.Time // last time config was loaded 242 243 logging struct { 244 sync.RWMutex 245 logger Logger 246 trace int32 247 debug int32 248 traceSysAcc int32 249 } 250 251 clientConnectURLs []string 252 253 // Used internally for quick look-ups. 254 clientConnectURLsMap refCountedUrlSet 255 256 lastCURLsUpdate int64 257 258 // For Gateways 259 gatewayListener net.Listener // Accept listener 260 gatewayListenerErr error 261 gateway *srvGateway 262 263 // Used by tests to check that http.Servers do 264 // not set any timeout. 265 monitoringServer *http.Server 266 profilingServer *http.Server 267 268 // LameDuck mode 269 ldm bool 270 ldmCh chan bool 271 272 // Trusted public operator keys. 273 trustedKeys []string 274 // map of trusted keys to operator setting StrictSigningKeyUsage 275 strictSigningKeyUsage map[string]struct{} 276 277 // We use this to minimize mem copies for requests to monitoring 278 // endpoint /varz (when it comes from http). 279 varzMu sync.Mutex 280 varz *Varz 281 // This is set during a config reload if we detect that we have 282 // added/removed routes. The monitoring code then check that 283 // to know if it should update the cluster's URLs array. 284 varzUpdateRouteURLs bool 285 286 // Keeps a sublist of of subscriptions attached to leafnode connections 287 // for the $GNR.*.*.*.> subject so that a server can send back a mapped 288 // gateway reply. 289 gwLeafSubs *Sublist 290 291 // Used for expiration of mapped GW replies 292 gwrm struct { 293 w int32 294 ch chan time.Duration 295 m sync.Map 296 } 297 298 // For eventIDs 299 eventIds *nuid.NUID 300 301 // Websocket structure 302 websocket srvWebsocket 303 304 // MQTT structure 305 mqtt srvMQTT 306 307 // OCSP monitoring 308 ocsps []*OCSPMonitor 309 310 // OCSP peer verification (at least one TLS block) 311 ocspPeerVerify bool 312 313 // OCSP response cache 314 ocsprc OCSPResponseCache 315 316 // exporting account name the importer experienced issues with 317 incompleteAccExporterMap sync.Map 318 319 // Holds cluster name under different lock for mapping 320 cnMu sync.RWMutex 321 cn string 322 323 // For registering raft nodes with the server. 324 rnMu sync.RWMutex 325 raftNodes map[string]RaftNode 326 327 // For mapping from a raft node name back to a server name and cluster. Node has to be in the same domain. 328 nodeToInfo sync.Map 329 330 // For out of resources to not log errors too fast. 331 rerrMu sync.Mutex 332 rerrLast time.Time 333 334 connRateCounter *rateCounter 335 336 // If there is a system account configured, to still support the $G account, 337 // the server will create a fake user and add it to the list of users. 338 // Keep track of what that user name is for config reload purposes. 339 sysAccOnlyNoAuthUser string 340 341 // IPQueues map 342 ipQueues sync.Map 343 344 // To limit logging frequency 345 rateLimitLogging sync.Map 346 rateLimitLoggingCh chan time.Duration 347 348 // Total outstanding catchup bytes in flight. 349 gcbMu sync.RWMutex 350 gcbOut int64 351 gcbOutMax int64 // Taken from JetStreamMaxCatchup or defaultMaxTotalCatchupOutBytes 352 // A global chanel to kick out stalled catchup sequences. 353 gcbKick chan struct{} 354 355 // Total outbound syncRequests 356 syncOutSem chan struct{} 357 358 // Queue to process JS API requests that come from routes (or gateways) 359 jsAPIRoutedReqs *ipQueue[*jsAPIRoutedReq] 360 361 // Dial and listen overrides. 362 network NetworkIntercept 363 } 364 365 // For tracking JS nodes. 366 type nodeInfo struct { 367 name string 368 version string 369 cluster string 370 domain string 371 id string 372 tags jwt.TagList 373 cfg *JetStreamConfig 374 stats *JetStreamStats 375 offline bool 376 js bool 377 binarySnapshots bool 378 } 379 380 // Make sure all are 64bits for atomic use 381 type stats struct { 382 inMsgs int64 383 outMsgs int64 384 inBytes int64 385 outBytes int64 386 slowConsumers int64 387 } 388 389 // scStats includes the total and per connection counters of Slow Consumers. 390 type scStats struct { 391 clients atomic.Uint64 392 routes atomic.Uint64 393 leafs atomic.Uint64 394 gateways atomic.Uint64 395 } 396 397 // This is used by tests so we can run all server tests with a default route 398 // or leafnode compression mode. For instance: 399 // go test -race -v ./server -cluster_compression=fast 400 var ( 401 testDefaultClusterCompression string 402 testDefaultLeafNodeCompression string 403 ) 404 405 // Compression modes. 406 const ( 407 CompressionNotSupported = "not supported" 408 CompressionOff = "off" 409 CompressionAccept = "accept" 410 CompressionS2Auto = "s2_auto" 411 CompressionS2Uncompressed = "s2_uncompressed" 412 CompressionS2Fast = "s2_fast" 413 CompressionS2Better = "s2_better" 414 CompressionS2Best = "s2_best" 415 ) 416 417 // defaultCompressionS2AutoRTTThresholds is the default of RTT thresholds for 418 // the CompressionS2Auto mode. 419 var defaultCompressionS2AutoRTTThresholds = []time.Duration{ 420 // [0..10ms] -> CompressionS2Uncompressed 421 10 * time.Millisecond, 422 // ]10ms..50ms] -> CompressionS2Fast 423 50 * time.Millisecond, 424 // ]50ms..100ms] -> CompressionS2Better 425 100 * time.Millisecond, 426 // ]100ms..] -> CompressionS2Best 427 } 428 429 // For a given user provided string, matches to one of the compression mode 430 // constant and updates the provided string to that constant. Returns an 431 // error if the provided compression mode is not known. 432 // The parameter `chosenModeForOn` indicates which compression mode to use 433 // when the user selects "on" (or enabled, true, etc..). This is because 434 // we may have different defaults depending on where the compression is used. 435 func validateAndNormalizeCompressionOption(c *CompressionOpts, chosenModeForOn string) error { 436 if c == nil { 437 return nil 438 } 439 cmtl := strings.ToLower(c.Mode) 440 // First, check for the "on" case so that we set to the default compression 441 // mode for that. The other switch/case will finish setup if needed (for 442 // instance if the default mode is s2Auto). 443 switch cmtl { 444 case "on", "enabled", "true": 445 cmtl = chosenModeForOn 446 default: 447 } 448 // Check (again) with the proper mode. 449 switch cmtl { 450 case "not supported", "not_supported": 451 c.Mode = CompressionNotSupported 452 case "disabled", "off", "false": 453 c.Mode = CompressionOff 454 case "accept": 455 c.Mode = CompressionAccept 456 case "auto", "s2_auto": 457 var rtts []time.Duration 458 if len(c.RTTThresholds) == 0 { 459 rtts = defaultCompressionS2AutoRTTThresholds 460 } else { 461 for _, n := range c.RTTThresholds { 462 // Do not error on negative, but simply set to 0 463 if n < 0 { 464 n = 0 465 } 466 // Make sure they are properly ordered. However, it is possible 467 // to have a "0" anywhere in the list to indicate that this 468 // compression level should not be used. 469 if l := len(rtts); l > 0 && n != 0 { 470 for _, v := range rtts { 471 if n < v { 472 return fmt.Errorf("RTT threshold values %v should be in ascending order", c.RTTThresholds) 473 } 474 } 475 } 476 rtts = append(rtts, n) 477 } 478 if len(rtts) > 0 { 479 // Trim 0 that are at the end. 480 stop := -1 481 for i := len(rtts) - 1; i >= 0; i-- { 482 if rtts[i] != 0 { 483 stop = i 484 break 485 } 486 } 487 rtts = rtts[:stop+1] 488 } 489 if len(rtts) > 4 { 490 // There should be at most values for "uncompressed", "fast", 491 // "better" and "best" (when some 0 are present). 492 return fmt.Errorf("compression mode %q should have no more than 4 RTT thresholds: %v", c.Mode, c.RTTThresholds) 493 } else if len(rtts) == 0 { 494 // But there should be at least 1 if the user provided the slice. 495 // We would be here only if it was provided by say with values 496 // being a single or all zeros. 497 return fmt.Errorf("compression mode %q requires at least one RTT threshold", c.Mode) 498 } 499 } 500 c.Mode = CompressionS2Auto 501 c.RTTThresholds = rtts 502 case "fast", "s2_fast": 503 c.Mode = CompressionS2Fast 504 case "better", "s2_better": 505 c.Mode = CompressionS2Better 506 case "best", "s2_best": 507 c.Mode = CompressionS2Best 508 default: 509 return fmt.Errorf("unsupported compression mode %q", c.Mode) 510 } 511 return nil 512 } 513 514 // Returns `true` if the compression mode `m` indicates that the server 515 // will negotiate compression with the remote server, `false` otherwise. 516 // Note that the provided compression mode is assumed to have been 517 // normalized and validated. 518 func needsCompression(m string) bool { 519 return m != _EMPTY_ && m != CompressionOff && m != CompressionNotSupported 520 } 521 522 // Compression is asymmetric, meaning that one side can have a different 523 // compression level than the other. However, we need to check for cases 524 // when this server `scm` or the remote `rcm` do not support compression 525 // (say older server, or test to make it behave as it is not), or have 526 // the compression off. 527 // Note that `scm` is assumed to not be "off" or "not supported". 528 func selectCompressionMode(scm, rcm string) (mode string, err error) { 529 if rcm == CompressionNotSupported || rcm == _EMPTY_ { 530 return CompressionNotSupported, nil 531 } 532 switch rcm { 533 case CompressionOff: 534 // If the remote explicitly disables compression, then we won't 535 // use compression. 536 return CompressionOff, nil 537 case CompressionAccept: 538 // If the remote is ok with compression (but is not initiating it), 539 // and if we too are in this mode, then it means no compression. 540 if scm == CompressionAccept { 541 return CompressionOff, nil 542 } 543 // Otherwise use our compression mode. 544 return scm, nil 545 case CompressionS2Auto, CompressionS2Uncompressed, CompressionS2Fast, CompressionS2Better, CompressionS2Best: 546 // This case is here to make sure that if we don't recognize a 547 // compression setting, we error out. 548 if scm == CompressionAccept { 549 // If our compression mode is "accept", then we will use the remote 550 // compression mode, except if it is "auto", in which case we will 551 // default to "fast". This is not a configuration (auto in one 552 // side and accept in the other) that would be recommended. 553 if rcm == CompressionS2Auto { 554 return CompressionS2Fast, nil 555 } 556 // Use their compression mode. 557 return rcm, nil 558 } 559 // Otherwise use our compression mode. 560 return scm, nil 561 default: 562 return _EMPTY_, fmt.Errorf("unsupported route compression mode %q", rcm) 563 } 564 } 565 566 // If the configured compression mode is "auto" then will return that, 567 // otherwise will return the given `cm` compression mode. 568 func compressionModeForInfoProtocol(co *CompressionOpts, cm string) string { 569 if co.Mode == CompressionS2Auto { 570 return CompressionS2Auto 571 } 572 return cm 573 } 574 575 // Given a connection RTT and a list of thresholds durations, this 576 // function will return an S2 compression level such as "uncompressed", 577 // "fast", "better" or "best". For instance, with the following slice: 578 // [5ms, 10ms, 15ms, 20ms], a RTT of up to 5ms will result 579 // in the compression level "uncompressed", ]5ms..10ms] will result in 580 // "fast" compression, etc.. 581 // However, the 0 value allows for disabling of some compression levels. 582 // For instance, the following slice: [0, 0, 20, 30] means that a RTT of 583 // [0..20ms] would result in the "better" compression - effectively disabling 584 // the use of "uncompressed" and "fast", then anything above 20ms would 585 // result in the use of "best" level (the 30 in the list has no effect 586 // and the list could have been simplified to [0, 0, 20]). 587 func selectS2AutoModeBasedOnRTT(rtt time.Duration, rttThresholds []time.Duration) string { 588 var idx int 589 var found bool 590 for i, d := range rttThresholds { 591 if rtt <= d { 592 idx = i 593 found = true 594 break 595 } 596 } 597 if !found { 598 // If we did not find but we have all levels, then use "best", 599 // otherwise use the last one in array. 600 if l := len(rttThresholds); l >= 3 { 601 idx = 3 602 } else { 603 idx = l - 1 604 } 605 } 606 switch idx { 607 case 0: 608 return CompressionS2Uncompressed 609 case 1: 610 return CompressionS2Fast 611 case 2: 612 return CompressionS2Better 613 } 614 return CompressionS2Best 615 } 616 617 // Returns an array of s2 WriterOption based on the route compression mode. 618 // So far we return a single option, but this way we can call s2.NewWriter() 619 // with a nil []s2.WriterOption, but not with a nil s2.WriterOption, so 620 // this is more versatile. 621 func s2WriterOptions(cm string) []s2.WriterOption { 622 _opts := [2]s2.WriterOption{} 623 opts := append( 624 _opts[:0], 625 s2.WriterConcurrency(1), // Stop asynchronous flushing in separate goroutines 626 ) 627 switch cm { 628 case CompressionS2Uncompressed: 629 return append(opts, s2.WriterUncompressed()) 630 case CompressionS2Best: 631 return append(opts, s2.WriterBestCompression()) 632 case CompressionS2Better: 633 return append(opts, s2.WriterBetterCompression()) 634 default: 635 return nil 636 } 637 } 638 639 // New will setup a new server struct after parsing the options. 640 // DEPRECATED: Use NewServer(opts) 641 func New(opts *Options) *Server { 642 s, _ := NewServer(opts) 643 return s 644 } 645 646 // NewServer will setup a new server struct after parsing the options. 647 // Could return an error if options can not be validated. 648 func NewServer(opts *Options) (*Server, error) { 649 setBaselineOptions(opts) 650 651 // Process TLS options, including whether we require client certificates. 652 tlsReq := opts.TLSConfig != nil 653 verify := (tlsReq && opts.TLSConfig.ClientAuth == tls.RequireAndVerifyClientCert) 654 655 // Create our server's nkey identity. 656 kp, _ := nkeys.CreateServer() 657 pub, _ := kp.PublicKey() 658 659 // Create an xkey for encrypting messages from this server. 660 xkp, _ := nkeys.CreateCurveKeys() 661 xpub, _ := xkp.PublicKey() 662 663 serverName := pub 664 if opts.ServerName != _EMPTY_ { 665 serverName = opts.ServerName 666 } 667 668 httpBasePath := normalizeBasePath(opts.HTTPBasePath) 669 670 // Validate some options. This is here because we cannot assume that 671 // server will always be started with configuration parsing (that could 672 // report issues). Its options can be (incorrectly) set by hand when 673 // server is embedded. If there is an error, return nil. 674 if err := validateOptions(opts); err != nil { 675 return nil, err 676 } 677 678 info := Info{ 679 ID: pub, 680 XKey: xpub, 681 Version: VERSION, 682 Proto: PROTO, 683 GitCommit: gitCommit, 684 GoVersion: runtime.Version(), 685 Name: serverName, 686 Host: opts.Host, 687 Port: opts.Port, 688 AuthRequired: false, 689 TLSRequired: tlsReq && !opts.AllowNonTLS, 690 TLSVerify: verify, 691 MaxPayload: opts.MaxPayload, 692 JetStream: opts.JetStream, 693 Headers: !opts.NoHeaderSupport, 694 Cluster: opts.Cluster.Name, 695 Domain: opts.JetStreamDomain, 696 } 697 698 if tlsReq && !info.TLSRequired { 699 info.TLSAvailable = true 700 } 701 702 now := time.Now() 703 704 s := &Server{ 705 kp: kp, 706 xkp: xkp, 707 xpub: xpub, 708 configFile: opts.ConfigFile, 709 info: info, 710 opts: opts, 711 done: make(chan bool, 1), 712 start: now, 713 configTime: now, 714 gwLeafSubs: NewSublistWithCache(), 715 httpBasePath: httpBasePath, 716 eventIds: nuid.New(), 717 routesToSelf: make(map[string]struct{}), 718 httpReqStats: make(map[string]uint64), // Used to track HTTP requests 719 rateLimitLoggingCh: make(chan time.Duration, 1), 720 leafNodeEnabled: opts.LeafNode.Port != 0 || len(opts.LeafNode.Remotes) > 0, 721 syncOutSem: make(chan struct{}, maxConcurrentSyncRequests), 722 network: opts.NetworkIntercept, 723 } 724 if s.network == nil { 725 s.network = natsNetworkIntercept{} 726 } 727 728 // Fill up the maximum in flight syncRequests for this server. 729 // Used in JetStream catchup semantics. 730 for i := 0; i < maxConcurrentSyncRequests; i++ { 731 s.syncOutSem <- struct{}{} 732 } 733 734 if opts.TLSRateLimit > 0 { 735 s.connRateCounter = newRateCounter(opts.tlsConfigOpts.RateLimit) 736 } 737 738 // Trusted root operator keys. 739 if !s.processTrustedKeys() { 740 return nil, fmt.Errorf("Error processing trusted operator keys") 741 } 742 743 // If we have solicited leafnodes but no clustering and no clustername. 744 // However we may need a stable clustername so use the server name. 745 if len(opts.LeafNode.Remotes) > 0 && opts.Cluster.Port == 0 && opts.Cluster.Name == _EMPTY_ { 746 opts.Cluster.Name = opts.ServerName 747 } 748 749 if opts.Cluster.Name != _EMPTY_ { 750 // Also place into mapping cn with cnMu lock. 751 s.cnMu.Lock() 752 s.cn = opts.Cluster.Name 753 s.cnMu.Unlock() 754 } 755 756 s.mu.Lock() 757 defer s.mu.Unlock() 758 759 // Place ourselves in the JetStream nodeInfo if needed. 760 if opts.JetStream { 761 ourNode := getHash(serverName) 762 s.nodeToInfo.Store(ourNode, nodeInfo{ 763 serverName, 764 VERSION, 765 opts.Cluster.Name, 766 opts.JetStreamDomain, 767 info.ID, 768 opts.Tags, 769 &JetStreamConfig{MaxMemory: opts.JetStreamMaxMemory, MaxStore: opts.JetStreamMaxStore, CompressOK: true}, 770 nil, 771 false, true, true, 772 }) 773 } 774 775 s.routeResolver = opts.Cluster.resolver 776 if s.routeResolver == nil { 777 s.routeResolver = net.DefaultResolver 778 } 779 780 // Used internally for quick look-ups. 781 s.clientConnectURLsMap = make(refCountedUrlSet) 782 s.websocket.connectURLsMap = make(refCountedUrlSet) 783 s.leafURLsMap = make(refCountedUrlSet) 784 785 // Ensure that non-exported options (used in tests) are properly set. 786 s.setLeafNodeNonExportedOptions() 787 788 // Setup OCSP Stapling and OCSP Peer. This will abort server from starting if there 789 // are no valid staples and OCSP Stapling policy is set to Always or MustStaple. 790 if err := s.enableOCSP(); err != nil { 791 return nil, err 792 } 793 794 // Call this even if there is no gateway defined. It will 795 // initialize the structure so we don't have to check for 796 // it to be nil or not in various places in the code. 797 if err := s.newGateway(opts); err != nil { 798 return nil, err 799 } 800 801 // If we have a cluster definition but do not have a cluster name, create one. 802 if opts.Cluster.Port != 0 && opts.Cluster.Name == _EMPTY_ { 803 s.info.Cluster = nuid.Next() 804 } else if opts.Cluster.Name != _EMPTY_ { 805 // Likewise here if we have a cluster name set. 806 s.info.Cluster = opts.Cluster.Name 807 } 808 809 // This is normally done in the AcceptLoop, once the 810 // listener has been created (possibly with random port), 811 // but since some tests may expect the INFO to be properly 812 // set after New(), let's do it now. 813 s.setInfoHostPort() 814 815 // For tracking clients 816 s.clients = make(map[uint64]*client) 817 818 // For tracking closed clients. 819 s.closed = newClosedRingBuffer(opts.MaxClosedClients) 820 821 // For tracking connections that are not yet registered 822 // in s.routes, but for which readLoop has started. 823 s.grTmpClients = make(map[uint64]*client) 824 825 // For tracking routes and their remote ids 826 s.initRouteStructures(opts) 827 828 // For tracking leaf nodes. 829 s.leafs = make(map[uint64]*client) 830 831 // Used to kick out all go routines possibly waiting on server 832 // to shutdown. 833 s.quitCh = make(chan struct{}) 834 835 // Closed when startup is complete. ReadyForConnections() will block on 836 // this before checking the presence of listening sockets. 837 s.startupComplete = make(chan struct{}) 838 839 // Closed when Shutdown() is complete. Allows WaitForShutdown() to block 840 // waiting for complete shutdown. 841 s.shutdownComplete = make(chan struct{}) 842 843 // Check for configured account resolvers. 844 if err := s.configureResolver(); err != nil { 845 return nil, err 846 } 847 // If there is an URL account resolver, do basic test to see if anyone is home. 848 if ar := opts.AccountResolver; ar != nil { 849 if ur, ok := ar.(*URLAccResolver); ok { 850 if _, err := ur.Fetch(_EMPTY_); err != nil { 851 return nil, err 852 } 853 } 854 } 855 // For other resolver: 856 // In operator mode, when the account resolver depends on an external system and 857 // the system account can't be fetched, inject a temporary one. 858 if ar := s.accResolver; len(opts.TrustedOperators) == 1 && ar != nil && 859 opts.SystemAccount != _EMPTY_ && opts.SystemAccount != DEFAULT_SYSTEM_ACCOUNT { 860 if _, ok := ar.(*MemAccResolver); !ok { 861 s.mu.Unlock() 862 var a *Account 863 // perform direct lookup to avoid warning trace 864 if _, err := fetchAccount(ar, opts.SystemAccount); err == nil { 865 a, _ = s.lookupAccount(opts.SystemAccount) 866 } 867 s.mu.Lock() 868 if a == nil { 869 sac := NewAccount(opts.SystemAccount) 870 sac.Issuer = opts.TrustedOperators[0].Issuer 871 sac.signingKeys = map[string]jwt.Scope{} 872 sac.signingKeys[opts.SystemAccount] = nil 873 s.registerAccountNoLock(sac) 874 } 875 } 876 } 877 878 // For tracking accounts 879 if _, err := s.configureAccounts(false); err != nil { 880 return nil, err 881 } 882 883 // Used to setup Authorization. 884 s.configureAuthorization() 885 886 // Start signal handler 887 s.handleSignals() 888 889 return s, nil 890 } 891 892 // Initializes route structures based on pooling and/or per-account routes. 893 // 894 // Server lock is held on entry 895 func (s *Server) initRouteStructures(opts *Options) { 896 s.routes = make(map[string][]*client) 897 if ps := opts.Cluster.PoolSize; ps > 0 { 898 s.routesPoolSize = ps 899 } else { 900 s.routesPoolSize = 1 901 } 902 // If we have per-account routes, we create accRoutes and initialize it 903 // with nil values. The presence of an account as the key will allow us 904 // to know if a given account is supposed to have dedicated routes. 905 if l := len(opts.Cluster.PinnedAccounts); l > 0 { 906 s.accRoutes = make(map[string]map[string]*client, l) 907 for _, acc := range opts.Cluster.PinnedAccounts { 908 s.accRoutes[acc] = make(map[string]*client) 909 } 910 } 911 } 912 913 func (s *Server) logRejectedTLSConns() { 914 defer s.grWG.Done() 915 t := time.NewTicker(time.Second) 916 defer t.Stop() 917 for { 918 select { 919 case <-s.quitCh: 920 return 921 case <-t.C: 922 blocked := s.connRateCounter.countBlocked() 923 if blocked > 0 { 924 s.Warnf("Rejected %d connections due to TLS rate limiting", blocked) 925 } 926 } 927 } 928 } 929 930 // clusterName returns our cluster name which could be dynamic. 931 func (s *Server) ClusterName() string { 932 s.mu.RLock() 933 cn := s.info.Cluster 934 s.mu.RUnlock() 935 return cn 936 } 937 938 // Grabs cluster name with cluster name specific lock. 939 func (s *Server) cachedClusterName() string { 940 s.cnMu.RLock() 941 cn := s.cn 942 s.cnMu.RUnlock() 943 return cn 944 } 945 946 // setClusterName will update the cluster name for this server. 947 func (s *Server) setClusterName(name string) { 948 s.mu.Lock() 949 var resetCh chan struct{} 950 if s.sys != nil && s.info.Cluster != name { 951 // can't hold the lock as go routine reading it may be waiting for lock as well 952 resetCh = s.sys.resetCh 953 } 954 s.info.Cluster = name 955 s.routeInfo.Cluster = name 956 957 // Need to close solicited leaf nodes. The close has to be done outside of the server lock. 958 var leafs []*client 959 for _, c := range s.leafs { 960 c.mu.Lock() 961 if c.leaf != nil && c.leaf.remote != nil { 962 leafs = append(leafs, c) 963 } 964 c.mu.Unlock() 965 } 966 s.mu.Unlock() 967 968 // Also place into mapping cn with cnMu lock. 969 s.cnMu.Lock() 970 s.cn = name 971 s.cnMu.Unlock() 972 973 for _, l := range leafs { 974 l.closeConnection(ClusterNameConflict) 975 } 976 if resetCh != nil { 977 resetCh <- struct{}{} 978 } 979 s.Noticef("Cluster name updated to %s", name) 980 } 981 982 // Return whether the cluster name is dynamic. 983 func (s *Server) isClusterNameDynamic() bool { 984 return s.getOpts().Cluster.Name == _EMPTY_ 985 } 986 987 // Returns our configured serverName. 988 func (s *Server) serverName() string { 989 return s.getOpts().ServerName 990 } 991 992 // ClientURL returns the URL used to connect clients. Helpful in testing 993 // when we designate a random client port (-1). 994 func (s *Server) ClientURL() string { 995 // FIXME(dlc) - should we add in user and pass if defined single? 996 opts := s.getOpts() 997 scheme := "nats://" 998 if opts.TLSConfig != nil { 999 scheme = "tls://" 1000 } 1001 return fmt.Sprintf("%s%s:%d", scheme, opts.Host, opts.Port) 1002 } 1003 1004 func validateCluster(o *Options) error { 1005 if o.Cluster.Compression.Mode != _EMPTY_ { 1006 if err := validateAndNormalizeCompressionOption(&o.Cluster.Compression, CompressionS2Fast); err != nil { 1007 return err 1008 } 1009 } 1010 if err := validatePinnedCerts(o.Cluster.TLSPinnedCerts); err != nil { 1011 return fmt.Errorf("cluster: %v", err) 1012 } 1013 // Check that cluster name if defined matches any gateway name. 1014 if o.Gateway.Name != "" && o.Gateway.Name != o.Cluster.Name { 1015 if o.Cluster.Name != "" { 1016 return ErrClusterNameConfigConflict 1017 } 1018 // Set this here so we do not consider it dynamic. 1019 o.Cluster.Name = o.Gateway.Name 1020 } 1021 if l := len(o.Cluster.PinnedAccounts); l > 0 { 1022 if o.Cluster.PoolSize < 0 { 1023 return fmt.Errorf("pool_size cannot be negative if pinned accounts are specified") 1024 } 1025 m := make(map[string]struct{}, l) 1026 for _, a := range o.Cluster.PinnedAccounts { 1027 if _, exists := m[a]; exists { 1028 return fmt.Errorf("found duplicate account name %q in pinned accounts list %q", a, o.Cluster.PinnedAccounts) 1029 } 1030 m[a] = struct{}{} 1031 } 1032 } 1033 return nil 1034 } 1035 1036 func validatePinnedCerts(pinned PinnedCertSet) error { 1037 re := regexp.MustCompile("^[a-f0-9]{64}$") 1038 for certId := range pinned { 1039 entry := strings.ToLower(certId) 1040 if !re.MatchString(entry) { 1041 return fmt.Errorf("error parsing 'pinned_certs' key %s does not look like lower case hex-encoded sha256 of DER encoded SubjectPublicKeyInfo", entry) 1042 } 1043 } 1044 return nil 1045 } 1046 1047 func validateOptions(o *Options) error { 1048 if o.LameDuckDuration > 0 && o.LameDuckGracePeriod >= o.LameDuckDuration { 1049 return fmt.Errorf("lame duck grace period (%v) should be strictly lower than lame duck duration (%v)", 1050 o.LameDuckGracePeriod, o.LameDuckDuration) 1051 } 1052 if int64(o.MaxPayload) > o.MaxPending { 1053 return fmt.Errorf("max_payload (%v) cannot be higher than max_pending (%v)", 1054 o.MaxPayload, o.MaxPending) 1055 } 1056 // Check that the trust configuration is correct. 1057 if err := validateTrustedOperators(o); err != nil { 1058 return err 1059 } 1060 // Check on leaf nodes which will require a system 1061 // account when gateways are also configured. 1062 if err := validateLeafNode(o); err != nil { 1063 return err 1064 } 1065 // Check that authentication is properly configured. 1066 if err := validateAuth(o); err != nil { 1067 return err 1068 } 1069 // Check that gateway is properly configured. Returns no error 1070 // if there is no gateway defined. 1071 if err := validateGatewayOptions(o); err != nil { 1072 return err 1073 } 1074 // Check that cluster name if defined matches any gateway name. 1075 if err := validateCluster(o); err != nil { 1076 return err 1077 } 1078 if err := validateMQTTOptions(o); err != nil { 1079 return err 1080 } 1081 if err := validateJetStreamOptions(o); err != nil { 1082 return err 1083 } 1084 // Finally check websocket options. 1085 return validateWebsocketOptions(o) 1086 } 1087 1088 func (s *Server) getOpts() *Options { 1089 s.optsMu.RLock() 1090 opts := s.opts 1091 s.optsMu.RUnlock() 1092 return opts 1093 } 1094 1095 func (s *Server) setOpts(opts *Options) { 1096 s.optsMu.Lock() 1097 s.opts = opts 1098 s.optsMu.Unlock() 1099 } 1100 1101 func (s *Server) globalAccount() *Account { 1102 s.mu.RLock() 1103 gacc := s.gacc 1104 s.mu.RUnlock() 1105 return gacc 1106 } 1107 1108 // Used to setup or update Accounts. 1109 // Returns a map that indicates which accounts have had their stream imports 1110 // changed (in case of an update in configuration reload). 1111 // Lock is held upon entry, but will be released/reacquired in this function. 1112 func (s *Server) configureAccounts(reloading bool) (map[string]struct{}, error) { 1113 awcsti := make(map[string]struct{}) 1114 1115 // Create the global account. 1116 if s.gacc == nil { 1117 s.gacc = NewAccount(globalAccountName) 1118 s.registerAccountNoLock(s.gacc) 1119 } 1120 1121 opts := s.getOpts() 1122 1123 // We need to track service imports since we can not swap them out (unsub and re-sub) 1124 // until the proper server struct accounts have been swapped in properly. Doing it in 1125 // place could lead to data loss or server panic since account under new si has no real 1126 // account and hence no sublist, so will panic on inbound message. 1127 siMap := make(map[*Account][][]byte) 1128 1129 // Check opts and walk through them. We need to copy them here 1130 // so that we do not keep a real one sitting in the options. 1131 for _, acc := range opts.Accounts { 1132 var a *Account 1133 create := true 1134 // For the global account, we want to skip the reload process 1135 // and fall back into the "create" case which will in that 1136 // case really be just an update (shallowCopy will make sure 1137 // that mappings are copied over). 1138 if reloading && acc.Name != globalAccountName { 1139 if ai, ok := s.accounts.Load(acc.Name); ok { 1140 a = ai.(*Account) 1141 a.mu.Lock() 1142 // Before updating the account, check if stream imports have changed. 1143 if !a.checkStreamImportsEqual(acc) { 1144 awcsti[acc.Name] = struct{}{} 1145 } 1146 // Collect the sids for the service imports since we are going to 1147 // replace with new ones. 1148 var sids [][]byte 1149 for _, si := range a.imports.services { 1150 if si.sid != nil { 1151 sids = append(sids, si.sid) 1152 } 1153 } 1154 // Setup to process later if needed. 1155 if len(sids) > 0 || len(acc.imports.services) > 0 { 1156 siMap[a] = sids 1157 } 1158 1159 // Now reset all export/imports fields since they are going to be 1160 // filled in shallowCopy() 1161 a.imports.streams, a.imports.services = nil, nil 1162 a.exports.streams, a.exports.services = nil, nil 1163 // We call shallowCopy from the account `acc` (the one in Options) 1164 // and pass `a` (our existing account) to get it updated. 1165 acc.shallowCopy(a) 1166 a.mu.Unlock() 1167 create = false 1168 } 1169 } 1170 // Track old mappings if global account. 1171 var oldGMappings []*mapping 1172 if create { 1173 if acc.Name == globalAccountName { 1174 a = s.gacc 1175 a.mu.Lock() 1176 oldGMappings = append(oldGMappings, a.mappings...) 1177 a.mu.Unlock() 1178 } else { 1179 a = NewAccount(acc.Name) 1180 } 1181 // Locking matters in the case of an update of the global account 1182 a.mu.Lock() 1183 acc.shallowCopy(a) 1184 a.mu.Unlock() 1185 // Will be a no-op in case of the global account since it is already registered. 1186 s.registerAccountNoLock(a) 1187 } 1188 1189 // The `acc` account is stored in options, not in the server, and these can be cleared. 1190 acc.sl, acc.clients, acc.mappings = nil, nil, nil 1191 1192 // Check here if we have been reloaded and we have a global account with mappings that may have changed. 1193 // If we have leafnodes they need to be updated. 1194 if reloading && a == s.gacc { 1195 a.mu.Lock() 1196 mappings := make(map[string]*mapping) 1197 if len(a.mappings) > 0 && a.nleafs > 0 { 1198 for _, em := range a.mappings { 1199 mappings[em.src] = em 1200 } 1201 } 1202 a.mu.Unlock() 1203 if len(mappings) > 0 || len(oldGMappings) > 0 { 1204 a.lmu.RLock() 1205 for _, lc := range a.lleafs { 1206 for _, em := range mappings { 1207 lc.forceAddToSmap(em.src) 1208 } 1209 // Remove any old ones if needed. 1210 for _, em := range oldGMappings { 1211 // Only remove if not in the new ones. 1212 if _, ok := mappings[em.src]; !ok { 1213 lc.forceRemoveFromSmap(em.src) 1214 } 1215 } 1216 } 1217 a.lmu.RUnlock() 1218 } 1219 } 1220 1221 // If we see an account defined using $SYS we will make sure that is set as system account. 1222 if acc.Name == DEFAULT_SYSTEM_ACCOUNT && opts.SystemAccount == _EMPTY_ { 1223 opts.SystemAccount = DEFAULT_SYSTEM_ACCOUNT 1224 } 1225 } 1226 1227 // Now that we have this we need to remap any referenced accounts in 1228 // import or export maps to the new ones. 1229 swapApproved := func(ea *exportAuth) { 1230 for sub, a := range ea.approved { 1231 var acc *Account 1232 if v, ok := s.accounts.Load(a.Name); ok { 1233 acc = v.(*Account) 1234 } 1235 ea.approved[sub] = acc 1236 } 1237 } 1238 var numAccounts int 1239 s.accounts.Range(func(k, v interface{}) bool { 1240 numAccounts++ 1241 acc := v.(*Account) 1242 acc.mu.Lock() 1243 // Exports 1244 for _, se := range acc.exports.streams { 1245 if se != nil { 1246 swapApproved(&se.exportAuth) 1247 } 1248 } 1249 for _, se := range acc.exports.services { 1250 if se != nil { 1251 // Swap over the bound account for service exports. 1252 if se.acc != nil { 1253 if v, ok := s.accounts.Load(se.acc.Name); ok { 1254 se.acc = v.(*Account) 1255 } 1256 } 1257 swapApproved(&se.exportAuth) 1258 } 1259 } 1260 // Imports 1261 for _, si := range acc.imports.streams { 1262 if v, ok := s.accounts.Load(si.acc.Name); ok { 1263 si.acc = v.(*Account) 1264 } 1265 } 1266 for _, si := range acc.imports.services { 1267 if v, ok := s.accounts.Load(si.acc.Name); ok { 1268 si.acc = v.(*Account) 1269 1270 // It is possible to allow for latency tracking inside your 1271 // own account, so lock only when not the same account. 1272 if si.acc == acc { 1273 si.se = si.acc.getServiceExport(si.to) 1274 continue 1275 } 1276 si.acc.mu.RLock() 1277 si.se = si.acc.getServiceExport(si.to) 1278 si.acc.mu.RUnlock() 1279 } 1280 } 1281 // Make sure the subs are running, but only if not reloading. 1282 if len(acc.imports.services) > 0 && acc.ic == nil && !reloading { 1283 acc.ic = s.createInternalAccountClient() 1284 acc.ic.acc = acc 1285 // Need to release locks to invoke this function. 1286 acc.mu.Unlock() 1287 s.mu.Unlock() 1288 acc.addAllServiceImportSubs() 1289 s.mu.Lock() 1290 acc.mu.Lock() 1291 } 1292 acc.updated = time.Now() 1293 acc.mu.Unlock() 1294 return true 1295 }) 1296 1297 // Check if we need to process service imports pending from above. 1298 // This processing needs to be after we swap in the real accounts above. 1299 for acc, sids := range siMap { 1300 c := acc.ic 1301 for _, sid := range sids { 1302 c.processUnsub(sid) 1303 } 1304 acc.addAllServiceImportSubs() 1305 s.mu.Unlock() 1306 s.registerSystemImports(acc) 1307 s.mu.Lock() 1308 } 1309 1310 // Set the system account if it was configured. 1311 // Otherwise create a default one. 1312 if opts.SystemAccount != _EMPTY_ { 1313 // Lock may be acquired in lookupAccount, so release to call lookupAccount. 1314 s.mu.Unlock() 1315 acc, err := s.lookupAccount(opts.SystemAccount) 1316 s.mu.Lock() 1317 if err == nil && s.sys != nil && acc != s.sys.account { 1318 // sys.account.clients (including internal client)/respmap/etc... are transferred separately 1319 s.sys.account = acc 1320 } 1321 if err != nil { 1322 return awcsti, fmt.Errorf("error resolving system account: %v", err) 1323 } 1324 1325 // If we have defined a system account here check to see if its just us and the $G account. 1326 // We would do this to add user/pass to the system account. If this is the case add in 1327 // no-auth-user for $G. 1328 // Only do this if non-operator mode and we did not have an authorization block defined. 1329 if len(opts.TrustedOperators) == 0 && numAccounts == 2 && opts.NoAuthUser == _EMPTY_ && !opts.authBlockDefined { 1330 // If we come here from config reload, let's not recreate the fake user name otherwise 1331 // it will cause currently clients to be disconnected. 1332 uname := s.sysAccOnlyNoAuthUser 1333 if uname == _EMPTY_ { 1334 // Create a unique name so we do not collide. 1335 var b [8]byte 1336 rn := rand.Int63() 1337 for i, l := 0, rn; i < len(b); i++ { 1338 b[i] = digits[l%base] 1339 l /= base 1340 } 1341 uname = fmt.Sprintf("nats-%s", b[:]) 1342 s.sysAccOnlyNoAuthUser = uname 1343 } 1344 opts.Users = append(opts.Users, &User{Username: uname, Password: uname[6:], Account: s.gacc}) 1345 opts.NoAuthUser = uname 1346 } 1347 } 1348 1349 // Add any required exports from system account. 1350 if s.sys != nil { 1351 s.mu.Unlock() 1352 s.addSystemAccountExports(s.sys.account) 1353 s.mu.Lock() 1354 } 1355 1356 return awcsti, nil 1357 } 1358 1359 // Setup the account resolver. For memory resolver, make sure the JWTs are 1360 // properly formed but do not enforce expiration etc. 1361 // Lock is held on entry, but may be released/reacquired during this call. 1362 func (s *Server) configureResolver() error { 1363 opts := s.getOpts() 1364 s.accResolver = opts.AccountResolver 1365 if opts.AccountResolver != nil { 1366 // For URL resolver, set the TLSConfig if specified. 1367 if opts.AccountResolverTLSConfig != nil { 1368 if ar, ok := opts.AccountResolver.(*URLAccResolver); ok { 1369 if t, ok := ar.c.Transport.(*http.Transport); ok { 1370 t.CloseIdleConnections() 1371 t.TLSClientConfig = opts.AccountResolverTLSConfig.Clone() 1372 } 1373 } 1374 } 1375 if len(opts.resolverPreloads) > 0 { 1376 // Lock ordering is account resolver -> server, so we need to release 1377 // the lock and reacquire it when done with account resolver's calls. 1378 ar := s.accResolver 1379 s.mu.Unlock() 1380 defer s.mu.Lock() 1381 if ar.IsReadOnly() { 1382 return fmt.Errorf("resolver preloads only available for writeable resolver types MEM/DIR/CACHE_DIR") 1383 } 1384 for k, v := range opts.resolverPreloads { 1385 _, err := jwt.DecodeAccountClaims(v) 1386 if err != nil { 1387 return fmt.Errorf("preload account error for %q: %v", k, err) 1388 } 1389 ar.Store(k, v) 1390 } 1391 } 1392 } 1393 return nil 1394 } 1395 1396 // This will check preloads for validation issues. 1397 func (s *Server) checkResolvePreloads() { 1398 opts := s.getOpts() 1399 // We can just check the read-only opts versions here, that way we do not need 1400 // to grab server lock or access s.accResolver. 1401 for k, v := range opts.resolverPreloads { 1402 claims, err := jwt.DecodeAccountClaims(v) 1403 if err != nil { 1404 s.Errorf("Preloaded account [%s] not valid", k) 1405 continue 1406 } 1407 // Check if it is expired. 1408 vr := jwt.CreateValidationResults() 1409 claims.Validate(vr) 1410 if vr.IsBlocking(true) { 1411 s.Warnf("Account [%s] has validation issues:", k) 1412 for _, v := range vr.Issues { 1413 s.Warnf(" - %s", v.Description) 1414 } 1415 } 1416 } 1417 } 1418 1419 // Determines if we are in pre NATS 2.0 setup with no accounts. 1420 func (s *Server) globalAccountOnly() bool { 1421 var hasOthers bool 1422 1423 if s.trustedKeys != nil { 1424 return false 1425 } 1426 1427 s.mu.RLock() 1428 s.accounts.Range(func(k, v interface{}) bool { 1429 acc := v.(*Account) 1430 // Ignore global and system 1431 if acc == s.gacc || (s.sys != nil && acc == s.sys.account) { 1432 return true 1433 } 1434 hasOthers = true 1435 return false 1436 }) 1437 s.mu.RUnlock() 1438 1439 return !hasOthers 1440 } 1441 1442 // Determines if this server is in standalone mode, meaning no routes or gateways. 1443 func (s *Server) standAloneMode() bool { 1444 opts := s.getOpts() 1445 return opts.Cluster.Port == 0 && opts.Gateway.Port == 0 1446 } 1447 1448 func (s *Server) configuredRoutes() int { 1449 return len(s.getOpts().Routes) 1450 } 1451 1452 // activePeers is used in bootstrapping raft groups like the JetStream meta controller. 1453 func (s *Server) ActivePeers() (peers []string) { 1454 s.nodeToInfo.Range(func(k, v interface{}) bool { 1455 si := v.(nodeInfo) 1456 if !si.offline { 1457 peers = append(peers, k.(string)) 1458 } 1459 return true 1460 }) 1461 return peers 1462 } 1463 1464 // isTrustedIssuer will check that the issuer is a trusted public key. 1465 // This is used to make sure an account was signed by a trusted operator. 1466 func (s *Server) isTrustedIssuer(issuer string) bool { 1467 s.mu.RLock() 1468 defer s.mu.RUnlock() 1469 // If we are not running in trusted mode and there is no issuer, that is ok. 1470 if s.trustedKeys == nil && issuer == _EMPTY_ { 1471 return true 1472 } 1473 for _, tk := range s.trustedKeys { 1474 if tk == issuer { 1475 return true 1476 } 1477 } 1478 return false 1479 } 1480 1481 // processTrustedKeys will process binary stamped and 1482 // options-based trusted nkeys. Returns success. 1483 func (s *Server) processTrustedKeys() bool { 1484 s.strictSigningKeyUsage = map[string]struct{}{} 1485 opts := s.getOpts() 1486 if trustedKeys != _EMPTY_ && !s.initStampedTrustedKeys() { 1487 return false 1488 } else if opts.TrustedKeys != nil { 1489 for _, key := range opts.TrustedKeys { 1490 if !nkeys.IsValidPublicOperatorKey(key) { 1491 return false 1492 } 1493 } 1494 s.trustedKeys = append([]string(nil), opts.TrustedKeys...) 1495 for _, claim := range opts.TrustedOperators { 1496 if !claim.StrictSigningKeyUsage { 1497 continue 1498 } 1499 for _, key := range claim.SigningKeys { 1500 s.strictSigningKeyUsage[key] = struct{}{} 1501 } 1502 } 1503 } 1504 return true 1505 } 1506 1507 // checkTrustedKeyString will check that the string is a valid array 1508 // of public operator nkeys. 1509 func checkTrustedKeyString(keys string) []string { 1510 tks := strings.Fields(keys) 1511 if len(tks) == 0 { 1512 return nil 1513 } 1514 // Walk all the keys and make sure they are valid. 1515 for _, key := range tks { 1516 if !nkeys.IsValidPublicOperatorKey(key) { 1517 return nil 1518 } 1519 } 1520 return tks 1521 } 1522 1523 // initStampedTrustedKeys will check the stamped trusted keys 1524 // and will set the server field 'trustedKeys'. Returns whether 1525 // it succeeded or not. 1526 func (s *Server) initStampedTrustedKeys() bool { 1527 // Check to see if we have an override in options, which will cause us to fail. 1528 if len(s.getOpts().TrustedKeys) > 0 { 1529 return false 1530 } 1531 tks := checkTrustedKeyString(trustedKeys) 1532 if len(tks) == 0 { 1533 return false 1534 } 1535 s.trustedKeys = tks 1536 return true 1537 } 1538 1539 // PrintAndDie is exported for access in other packages. 1540 func PrintAndDie(msg string) { 1541 fmt.Fprintln(os.Stderr, msg) 1542 os.Exit(1) 1543 } 1544 1545 // PrintServerAndExit will print our version and exit. 1546 func PrintServerAndExit() { 1547 fmt.Printf("nats-server: v%s\n", VERSION) 1548 os.Exit(0) 1549 } 1550 1551 // ProcessCommandLineArgs takes the command line arguments 1552 // validating and setting flags for handling in case any 1553 // sub command was present. 1554 func ProcessCommandLineArgs(cmd *flag.FlagSet) (showVersion bool, showHelp bool, err error) { 1555 if len(cmd.Args()) > 0 { 1556 arg := cmd.Args()[0] 1557 switch strings.ToLower(arg) { 1558 case "version": 1559 return true, false, nil 1560 case "help": 1561 return false, true, nil 1562 default: 1563 return false, false, fmt.Errorf("unrecognized command: %q", arg) 1564 } 1565 } 1566 1567 return false, false, nil 1568 } 1569 1570 // Public version. 1571 func (s *Server) Running() bool { 1572 return s.isRunning() 1573 } 1574 1575 // Protected check on running state 1576 func (s *Server) isRunning() bool { 1577 return s.running.Load() 1578 } 1579 1580 func (s *Server) logPid() error { 1581 pidStr := strconv.Itoa(os.Getpid()) 1582 return os.WriteFile(s.getOpts().PidFile, []byte(pidStr), 0660) 1583 } 1584 1585 // numReservedAccounts will return the number of reserved accounts configured in the server. 1586 // Currently this is 1, one for the global default account. 1587 func (s *Server) numReservedAccounts() int { 1588 return 1 1589 } 1590 1591 // NumActiveAccounts reports number of active accounts on this server. 1592 func (s *Server) NumActiveAccounts() int32 { 1593 return atomic.LoadInt32(&s.activeAccounts) 1594 } 1595 1596 // incActiveAccounts() just adds one under lock. 1597 func (s *Server) incActiveAccounts() { 1598 atomic.AddInt32(&s.activeAccounts, 1) 1599 } 1600 1601 // decActiveAccounts() just subtracts one under lock. 1602 func (s *Server) decActiveAccounts() { 1603 atomic.AddInt32(&s.activeAccounts, -1) 1604 } 1605 1606 // This should be used for testing only. Will be slow since we have to 1607 // range over all accounts in the sync.Map to count. 1608 func (s *Server) numAccounts() int { 1609 count := 0 1610 s.mu.RLock() 1611 s.accounts.Range(func(k, v interface{}) bool { 1612 count++ 1613 return true 1614 }) 1615 s.mu.RUnlock() 1616 return count 1617 } 1618 1619 // NumLoadedAccounts returns the number of loaded accounts. 1620 func (s *Server) NumLoadedAccounts() int { 1621 return s.numAccounts() 1622 } 1623 1624 // LookupOrRegisterAccount will return the given account if known or create a new entry. 1625 func (s *Server) LookupOrRegisterAccount(name string) (account *Account, isNew bool) { 1626 s.mu.Lock() 1627 defer s.mu.Unlock() 1628 if v, ok := s.accounts.Load(name); ok { 1629 return v.(*Account), false 1630 } 1631 acc := NewAccount(name) 1632 s.registerAccountNoLock(acc) 1633 return acc, true 1634 } 1635 1636 // RegisterAccount will register an account. The account must be new 1637 // or this call will fail. 1638 func (s *Server) RegisterAccount(name string) (*Account, error) { 1639 s.mu.Lock() 1640 defer s.mu.Unlock() 1641 if _, ok := s.accounts.Load(name); ok { 1642 return nil, ErrAccountExists 1643 } 1644 acc := NewAccount(name) 1645 s.registerAccountNoLock(acc) 1646 return acc, nil 1647 } 1648 1649 // SetSystemAccount will set the internal system account. 1650 // If root operators are present it will also check validity. 1651 func (s *Server) SetSystemAccount(accName string) error { 1652 // Lookup from sync.Map first. 1653 if v, ok := s.accounts.Load(accName); ok { 1654 return s.setSystemAccount(v.(*Account)) 1655 } 1656 1657 // If we are here we do not have local knowledge of this account. 1658 // Do this one by hand to return more useful error. 1659 ac, jwt, err := s.fetchAccountClaims(accName) 1660 if err != nil { 1661 return err 1662 } 1663 acc := s.buildInternalAccount(ac) 1664 acc.claimJWT = jwt 1665 // Due to race, we need to make sure that we are not 1666 // registering twice. 1667 if racc := s.registerAccount(acc); racc != nil { 1668 return nil 1669 } 1670 return s.setSystemAccount(acc) 1671 } 1672 1673 // SystemAccount returns the system account if set. 1674 func (s *Server) SystemAccount() *Account { 1675 var sacc *Account 1676 s.mu.RLock() 1677 if s.sys != nil { 1678 sacc = s.sys.account 1679 } 1680 s.mu.RUnlock() 1681 return sacc 1682 } 1683 1684 // GlobalAccount returns the global account. 1685 // Default clients will use the global account. 1686 func (s *Server) GlobalAccount() *Account { 1687 s.mu.RLock() 1688 defer s.mu.RUnlock() 1689 return s.gacc 1690 } 1691 1692 // SetDefaultSystemAccount will create a default system account if one is not present. 1693 func (s *Server) SetDefaultSystemAccount() error { 1694 if _, isNew := s.LookupOrRegisterAccount(DEFAULT_SYSTEM_ACCOUNT); !isNew { 1695 return nil 1696 } 1697 s.Debugf("Created system account: %q", DEFAULT_SYSTEM_ACCOUNT) 1698 return s.SetSystemAccount(DEFAULT_SYSTEM_ACCOUNT) 1699 } 1700 1701 // Assign a system account. Should only be called once. 1702 // This sets up a server to send and receive messages from 1703 // inside the server itself. 1704 func (s *Server) setSystemAccount(acc *Account) error { 1705 if acc == nil { 1706 return ErrMissingAccount 1707 } 1708 // Don't try to fix this here. 1709 if acc.IsExpired() { 1710 return ErrAccountExpired 1711 } 1712 // If we are running with trusted keys for an operator 1713 // make sure we check the account is legit. 1714 if !s.isTrustedIssuer(acc.Issuer) { 1715 return ErrAccountValidation 1716 } 1717 1718 s.mu.Lock() 1719 1720 if s.sys != nil { 1721 s.mu.Unlock() 1722 return ErrAccountExists 1723 } 1724 1725 // This is here in an attempt to quiet the race detector and not have to place 1726 // locks on fast path for inbound messages and checking service imports. 1727 acc.mu.Lock() 1728 if acc.imports.services == nil { 1729 acc.imports.services = make(map[string]*serviceImport) 1730 } 1731 acc.mu.Unlock() 1732 1733 s.sys = &internal{ 1734 account: acc, 1735 client: s.createInternalSystemClient(), 1736 seq: 1, 1737 sid: 1, 1738 servers: make(map[string]*serverUpdate), 1739 replies: make(map[string]msgHandler), 1740 sendq: newIPQueue[*pubMsg](s, "System sendQ"), 1741 recvq: newIPQueue[*inSysMsg](s, "System recvQ"), 1742 resetCh: make(chan struct{}), 1743 sq: s.newSendQ(), 1744 statsz: eventsHBInterval, 1745 orphMax: 5 * eventsHBInterval, 1746 chkOrph: 3 * eventsHBInterval, 1747 } 1748 s.sys.wg.Add(1) 1749 s.mu.Unlock() 1750 1751 // Register with the account. 1752 s.sys.client.registerWithAccount(acc) 1753 1754 s.addSystemAccountExports(acc) 1755 1756 // Start our internal loop to serialize outbound messages. 1757 // We do our own wg here since we will stop first during shutdown. 1758 go s.internalSendLoop(&s.sys.wg) 1759 1760 // Start the internal loop for inbound messages. 1761 go s.internalReceiveLoop() 1762 1763 // Start up our general subscriptions 1764 s.initEventTracking() 1765 1766 // Track for dead remote servers. 1767 s.wrapChk(s.startRemoteServerSweepTimer)() 1768 1769 // Send out statsz updates periodically. 1770 s.wrapChk(s.startStatszTimer)() 1771 1772 // If we have existing accounts make sure we enable account tracking. 1773 s.mu.Lock() 1774 s.accounts.Range(func(k, v interface{}) bool { 1775 acc := v.(*Account) 1776 s.enableAccountTracking(acc) 1777 return true 1778 }) 1779 s.mu.Unlock() 1780 1781 return nil 1782 } 1783 1784 // Creates an internal system client. 1785 func (s *Server) createInternalSystemClient() *client { 1786 return s.createInternalClient(SYSTEM) 1787 } 1788 1789 // Creates an internal jetstream client. 1790 func (s *Server) createInternalJetStreamClient() *client { 1791 return s.createInternalClient(JETSTREAM) 1792 } 1793 1794 // Creates an internal client for Account. 1795 func (s *Server) createInternalAccountClient() *client { 1796 return s.createInternalClient(ACCOUNT) 1797 } 1798 1799 // Internal clients. kind should be SYSTEM or JETSTREAM 1800 func (s *Server) createInternalClient(kind int) *client { 1801 if kind != SYSTEM && kind != JETSTREAM && kind != ACCOUNT { 1802 return nil 1803 } 1804 now := time.Now() 1805 c := &client{srv: s, kind: kind, opts: internalOpts, msubs: -1, mpay: -1, start: now, last: now} 1806 c.initClient() 1807 c.echo = false 1808 c.headers = true 1809 c.flags.set(noReconnect) 1810 return c 1811 } 1812 1813 // Determine if accounts should track subscriptions for 1814 // efficient propagation. 1815 // Lock should be held on entry. 1816 func (s *Server) shouldTrackSubscriptions() bool { 1817 opts := s.getOpts() 1818 return (opts.Cluster.Port != 0 || opts.Gateway.Port != 0) 1819 } 1820 1821 // Invokes registerAccountNoLock under the protection of the server lock. 1822 // That is, server lock is acquired/released in this function. 1823 // See registerAccountNoLock for comment on returned value. 1824 func (s *Server) registerAccount(acc *Account) *Account { 1825 s.mu.Lock() 1826 racc := s.registerAccountNoLock(acc) 1827 s.mu.Unlock() 1828 return racc 1829 } 1830 1831 // Helper to set the sublist based on preferences. 1832 func (s *Server) setAccountSublist(acc *Account) { 1833 if acc != nil && acc.sl == nil { 1834 opts := s.getOpts() 1835 if opts != nil && opts.NoSublistCache { 1836 acc.sl = NewSublistNoCache() 1837 } else { 1838 acc.sl = NewSublistWithCache() 1839 } 1840 } 1841 } 1842 1843 // Registers an account in the server. 1844 // Due to some locking considerations, we may end-up trying 1845 // to register the same account twice. This function will 1846 // then return the already registered account. 1847 // Lock should be held on entry. 1848 func (s *Server) registerAccountNoLock(acc *Account) *Account { 1849 // We are under the server lock. Lookup from map, if present 1850 // return existing account. 1851 if a, _ := s.accounts.Load(acc.Name); a != nil { 1852 s.tmpAccounts.Delete(acc.Name) 1853 return a.(*Account) 1854 } 1855 // Finish account setup and store. 1856 s.setAccountSublist(acc) 1857 1858 acc.mu.Lock() 1859 s.setRouteInfo(acc) 1860 if acc.clients == nil { 1861 acc.clients = make(map[*client]struct{}) 1862 } 1863 1864 // If we are capable of routing we will track subscription 1865 // information for efficient interest propagation. 1866 // During config reload, it is possible that account was 1867 // already created (global account), so use locking and 1868 // make sure we create only if needed. 1869 // TODO(dlc)- Double check that we need this for GWs. 1870 if acc.rm == nil && s.opts != nil && s.shouldTrackSubscriptions() { 1871 acc.rm = make(map[string]int32) 1872 acc.lqws = make(map[string]int32) 1873 } 1874 acc.srv = s 1875 acc.updated = time.Now() 1876 accName := acc.Name 1877 jsEnabled := len(acc.jsLimits) > 0 1878 acc.mu.Unlock() 1879 1880 if opts := s.getOpts(); opts != nil && len(opts.JsAccDefaultDomain) > 0 { 1881 if defDomain, ok := opts.JsAccDefaultDomain[accName]; ok { 1882 if jsEnabled { 1883 s.Warnf("Skipping Default Domain %q, set for JetStream enabled account %q", defDomain, accName) 1884 } else if defDomain != _EMPTY_ { 1885 for src, dest := range generateJSMappingTable(defDomain) { 1886 // flip src and dest around so the domain is inserted 1887 s.Noticef("Adding default domain mapping %q -> %q to account %q %p", dest, src, accName, acc) 1888 if err := acc.AddMapping(dest, src); err != nil { 1889 s.Errorf("Error adding JetStream default domain mapping: %v", err) 1890 } 1891 } 1892 } 1893 } 1894 } 1895 1896 s.accounts.Store(acc.Name, acc) 1897 s.tmpAccounts.Delete(acc.Name) 1898 s.enableAccountTracking(acc) 1899 1900 // Can not have server lock here. 1901 s.mu.Unlock() 1902 s.registerSystemImports(acc) 1903 // Starting 2.9.0, we are phasing out the optimistic mode, so change 1904 // the account to interest-only mode (except if instructed not to do 1905 // it in some tests). 1906 if s.gateway.enabled && !gwDoNotForceInterestOnlyMode { 1907 s.switchAccountToInterestMode(acc.GetName()) 1908 } 1909 s.mu.Lock() 1910 1911 return nil 1912 } 1913 1914 // Sets the account's routePoolIdx depending on presence or not of 1915 // pooling or per-account routes. Also updates a map used by 1916 // gateway code to retrieve a route based on some route hash. 1917 // 1918 // Both Server and Account lock held on entry. 1919 func (s *Server) setRouteInfo(acc *Account) { 1920 // If there is a dedicated route configured for this account 1921 if _, ok := s.accRoutes[acc.Name]; ok { 1922 // We want the account name to be in the map, but we don't 1923 // need a value (we could store empty string) 1924 s.accRouteByHash.Store(acc.Name, nil) 1925 // Set the route pool index to -1 so that it is easy when 1926 // ranging over accounts to exclude those accounts when 1927 // trying to get accounts for a given pool index. 1928 acc.routePoolIdx = accDedicatedRoute 1929 } else { 1930 // If pool size more than 1, we will compute a hash code and 1931 // use modulo to assign to an index of the pool slice. For 1 1932 // and below, all accounts will be bound to the single connection 1933 // at index 0. 1934 acc.routePoolIdx = s.computeRoutePoolIdx(acc) 1935 if s.routesPoolSize > 1 { 1936 s.accRouteByHash.Store(acc.Name, acc.routePoolIdx) 1937 } 1938 } 1939 } 1940 1941 // Returns a route pool index for this account based on the given pool size. 1942 // Account lock is held on entry (account's name is accessed but immutable 1943 // so could be called without account's lock). 1944 // Server lock held on entry. 1945 func (s *Server) computeRoutePoolIdx(acc *Account) int { 1946 if s.routesPoolSize <= 1 { 1947 return 0 1948 } 1949 h := fnv.New32a() 1950 h.Write([]byte(acc.Name)) 1951 sum32 := h.Sum32() 1952 return int((sum32 % uint32(s.routesPoolSize))) 1953 } 1954 1955 // lookupAccount is a function to return the account structure 1956 // associated with an account name. 1957 // Lock MUST NOT be held upon entry. 1958 func (s *Server) lookupAccount(name string) (*Account, error) { 1959 var acc *Account 1960 if v, ok := s.accounts.Load(name); ok { 1961 acc = v.(*Account) 1962 } 1963 if acc != nil { 1964 // If we are expired and we have a resolver, then 1965 // return the latest information from the resolver. 1966 if acc.IsExpired() { 1967 s.Debugf("Requested account [%s] has expired", name) 1968 if s.AccountResolver() != nil { 1969 if err := s.updateAccount(acc); err != nil { 1970 // This error could mask expired, so just return expired here. 1971 return nil, ErrAccountExpired 1972 } 1973 } else { 1974 return nil, ErrAccountExpired 1975 } 1976 } 1977 return acc, nil 1978 } 1979 // If we have a resolver see if it can fetch the account. 1980 if s.AccountResolver() == nil { 1981 return nil, ErrMissingAccount 1982 } 1983 return s.fetchAccount(name) 1984 } 1985 1986 // LookupAccount is a public function to return the account structure 1987 // associated with name. 1988 func (s *Server) LookupAccount(name string) (*Account, error) { 1989 return s.lookupAccount(name) 1990 } 1991 1992 // This will fetch new claims and if found update the account with new claims. 1993 // Lock MUST NOT be held upon entry. 1994 func (s *Server) updateAccount(acc *Account) error { 1995 acc.mu.RLock() 1996 // TODO(dlc) - Make configurable 1997 if !acc.incomplete && time.Since(acc.updated) < time.Second { 1998 acc.mu.RUnlock() 1999 s.Debugf("Requested account update for [%s] ignored, too soon", acc.Name) 2000 return ErrAccountResolverUpdateTooSoon 2001 } 2002 acc.mu.RUnlock() 2003 claimJWT, err := s.fetchRawAccountClaims(acc.Name) 2004 if err != nil { 2005 return err 2006 } 2007 return s.updateAccountWithClaimJWT(acc, claimJWT) 2008 } 2009 2010 // updateAccountWithClaimJWT will check and apply the claim update. 2011 // Lock MUST NOT be held upon entry. 2012 func (s *Server) updateAccountWithClaimJWT(acc *Account, claimJWT string) error { 2013 if acc == nil { 2014 return ErrMissingAccount 2015 } 2016 acc.mu.RLock() 2017 sameClaim := acc.claimJWT != _EMPTY_ && acc.claimJWT == claimJWT && !acc.incomplete 2018 acc.mu.RUnlock() 2019 if sameClaim { 2020 s.Debugf("Requested account update for [%s], same claims detected", acc.Name) 2021 return nil 2022 } 2023 accClaims, _, err := s.verifyAccountClaims(claimJWT) 2024 if err == nil && accClaims != nil { 2025 acc.mu.Lock() 2026 if acc.Issuer == _EMPTY_ { 2027 acc.Issuer = accClaims.Issuer 2028 } 2029 if acc.Name != accClaims.Subject { 2030 acc.mu.Unlock() 2031 return ErrAccountValidation 2032 } 2033 acc.mu.Unlock() 2034 s.UpdateAccountClaims(acc, accClaims) 2035 acc.mu.Lock() 2036 // needs to be set after update completed. 2037 // This causes concurrent calls to return with sameClaim=true if the change is effective. 2038 acc.claimJWT = claimJWT 2039 acc.mu.Unlock() 2040 return nil 2041 } 2042 return err 2043 } 2044 2045 // fetchRawAccountClaims will grab raw account claims iff we have a resolver. 2046 // Lock is NOT held upon entry. 2047 func (s *Server) fetchRawAccountClaims(name string) (string, error) { 2048 accResolver := s.AccountResolver() 2049 if accResolver == nil { 2050 return _EMPTY_, ErrNoAccountResolver 2051 } 2052 // Need to do actual Fetch 2053 start := time.Now() 2054 claimJWT, err := fetchAccount(accResolver, name) 2055 fetchTime := time.Since(start) 2056 if fetchTime > time.Second { 2057 s.Warnf("Account [%s] fetch took %v", name, fetchTime) 2058 } else { 2059 s.Debugf("Account [%s] fetch took %v", name, fetchTime) 2060 } 2061 if err != nil { 2062 s.Warnf("Account fetch failed: %v", err) 2063 return "", err 2064 } 2065 return claimJWT, nil 2066 } 2067 2068 // fetchAccountClaims will attempt to fetch new claims if a resolver is present. 2069 // Lock is NOT held upon entry. 2070 func (s *Server) fetchAccountClaims(name string) (*jwt.AccountClaims, string, error) { 2071 claimJWT, err := s.fetchRawAccountClaims(name) 2072 if err != nil { 2073 return nil, _EMPTY_, err 2074 } 2075 var claim *jwt.AccountClaims 2076 claim, claimJWT, err = s.verifyAccountClaims(claimJWT) 2077 if claim != nil && claim.Subject != name { 2078 return nil, _EMPTY_, ErrAccountValidation 2079 } 2080 return claim, claimJWT, err 2081 } 2082 2083 // verifyAccountClaims will decode and validate any account claims. 2084 func (s *Server) verifyAccountClaims(claimJWT string) (*jwt.AccountClaims, string, error) { 2085 accClaims, err := jwt.DecodeAccountClaims(claimJWT) 2086 if err != nil { 2087 return nil, _EMPTY_, err 2088 } 2089 if !s.isTrustedIssuer(accClaims.Issuer) { 2090 return nil, _EMPTY_, ErrAccountValidation 2091 } 2092 vr := jwt.CreateValidationResults() 2093 accClaims.Validate(vr) 2094 if vr.IsBlocking(true) { 2095 return nil, _EMPTY_, ErrAccountValidation 2096 } 2097 return accClaims, claimJWT, nil 2098 } 2099 2100 // This will fetch an account from a resolver if defined. 2101 // Lock is NOT held upon entry. 2102 func (s *Server) fetchAccount(name string) (*Account, error) { 2103 accClaims, claimJWT, err := s.fetchAccountClaims(name) 2104 if accClaims == nil { 2105 return nil, err 2106 } 2107 acc := s.buildInternalAccount(accClaims) 2108 acc.claimJWT = claimJWT 2109 // Due to possible race, if registerAccount() returns a non 2110 // nil account, it means the same account was already 2111 // registered and we should use this one. 2112 if racc := s.registerAccount(acc); racc != nil { 2113 // Update with the new claims in case they are new. 2114 if err = s.updateAccountWithClaimJWT(racc, claimJWT); err != nil { 2115 return nil, err 2116 } 2117 return racc, nil 2118 } 2119 // The sub imports may have been setup but will not have had their 2120 // subscriptions properly setup. Do that here. 2121 var needImportSubs bool 2122 2123 acc.mu.Lock() 2124 if len(acc.imports.services) > 0 { 2125 if acc.ic == nil { 2126 acc.ic = s.createInternalAccountClient() 2127 acc.ic.acc = acc 2128 } 2129 needImportSubs = true 2130 } 2131 acc.mu.Unlock() 2132 2133 // Do these outside the lock. 2134 if needImportSubs { 2135 acc.addAllServiceImportSubs() 2136 } 2137 2138 return acc, nil 2139 } 2140 2141 // Start up the server, this will not block. 2142 // 2143 // WaitForShutdown can be used to block and wait for the server to shutdown properly if needed 2144 // after calling s.Shutdown() 2145 func (s *Server) Start() { 2146 s.Noticef("Starting nats-server") 2147 2148 gc := gitCommit 2149 if gc == _EMPTY_ { 2150 gc = "not set" 2151 } 2152 2153 // Snapshot server options. 2154 opts := s.getOpts() 2155 clusterName := s.ClusterName() 2156 2157 s.Noticef(" Version: %s", VERSION) 2158 s.Noticef(" Git: [%s]", gc) 2159 s.Debugf(" Go build: %s", s.info.GoVersion) 2160 if clusterName != _EMPTY_ { 2161 s.Noticef(" Cluster: %s", clusterName) 2162 } 2163 s.Noticef(" Name: %s", s.info.Name) 2164 if opts.JetStream { 2165 s.Noticef(" Node: %s", getHash(s.info.Name)) 2166 } 2167 s.Noticef(" ID: %s", s.info.ID) 2168 2169 defer s.Noticef("Server is ready") 2170 2171 // Check for insecure configurations. 2172 s.checkAuthforWarnings() 2173 2174 // Avoid RACE between Start() and Shutdown() 2175 s.running.Store(true) 2176 s.mu.Lock() 2177 // Update leafNodeEnabled in case options have changed post NewServer() 2178 // and before Start() (we should not be able to allow that, but server has 2179 // direct reference to user-provided options - at least before a Reload() is 2180 // performed. 2181 s.leafNodeEnabled = opts.LeafNode.Port != 0 || len(opts.LeafNode.Remotes) > 0 2182 s.mu.Unlock() 2183 2184 s.grMu.Lock() 2185 s.grRunning = true 2186 s.grMu.Unlock() 2187 2188 s.startRateLimitLogExpiration() 2189 2190 // Pprof http endpoint for the profiler. 2191 if opts.ProfPort != 0 { 2192 s.StartProfiler() 2193 } else { 2194 // It's still possible to access this profile via a SYS endpoint, so set 2195 // this anyway. (Otherwise StartProfiler would have called it.) 2196 s.setBlockProfileRate(opts.ProfBlockRate) 2197 } 2198 2199 if opts.ConfigFile != _EMPTY_ { 2200 s.Noticef("Using configuration file: %s", opts.ConfigFile) 2201 } 2202 2203 hasOperators := len(opts.TrustedOperators) > 0 2204 if hasOperators { 2205 s.Noticef("Trusted Operators") 2206 } 2207 for _, opc := range opts.TrustedOperators { 2208 s.Noticef(" System : %q", opc.Audience) 2209 s.Noticef(" Operator: %q", opc.Name) 2210 s.Noticef(" Issued : %v", time.Unix(opc.IssuedAt, 0)) 2211 switch opc.Expires { 2212 case 0: 2213 s.Noticef(" Expires : Never") 2214 default: 2215 s.Noticef(" Expires : %v", time.Unix(opc.Expires, 0)) 2216 } 2217 } 2218 if hasOperators && opts.SystemAccount == _EMPTY_ { 2219 s.Warnf("Trusted Operators should utilize a System Account") 2220 } 2221 if opts.MaxPayload > MAX_PAYLOAD_MAX_SIZE { 2222 s.Warnf("Maximum payloads over %v are generally discouraged and could lead to poor performance", 2223 friendlyBytes(int64(MAX_PAYLOAD_MAX_SIZE))) 2224 } 2225 2226 if len(opts.JsAccDefaultDomain) > 0 { 2227 s.Warnf("The option `default_js_domain` is a temporary backwards compatibility measure and will be removed") 2228 } 2229 2230 // If we have a memory resolver, check the accounts here for validation exceptions. 2231 // This allows them to be logged right away vs when they are accessed via a client. 2232 if hasOperators && len(opts.resolverPreloads) > 0 { 2233 s.checkResolvePreloads() 2234 } 2235 2236 // Log the pid to a file. 2237 if opts.PidFile != _EMPTY_ { 2238 if err := s.logPid(); err != nil { 2239 s.Fatalf("Could not write pidfile: %v", err) 2240 return 2241 } 2242 } 2243 2244 // Setup system account which will start the eventing stack. 2245 if sa := opts.SystemAccount; sa != _EMPTY_ { 2246 if err := s.SetSystemAccount(sa); err != nil { 2247 s.Fatalf("Can't set system account: %v", err) 2248 return 2249 } 2250 } else if !opts.NoSystemAccount { 2251 // We will create a default system account here. 2252 s.SetDefaultSystemAccount() 2253 } 2254 2255 // Start monitoring before enabling other subsystems of the 2256 // server to be able to monitor during startup. 2257 if err := s.StartMonitoring(); err != nil { 2258 s.Fatalf("Can't start monitoring: %v", err) 2259 return 2260 } 2261 2262 // Start up resolver machinery. 2263 if ar := s.AccountResolver(); ar != nil { 2264 if err := ar.Start(s); err != nil { 2265 s.Fatalf("Could not start resolver: %v", err) 2266 return 2267 } 2268 // In operator mode, when the account resolver depends on an external system and 2269 // the system account is the bootstrapping account, start fetching it. 2270 if len(opts.TrustedOperators) == 1 && opts.SystemAccount != _EMPTY_ && opts.SystemAccount != DEFAULT_SYSTEM_ACCOUNT { 2271 opts := s.getOpts() 2272 _, isMemResolver := ar.(*MemAccResolver) 2273 if v, ok := s.accounts.Load(opts.SystemAccount); !isMemResolver && ok && v.(*Account).claimJWT == _EMPTY_ { 2274 s.Noticef("Using bootstrapping system account") 2275 s.startGoRoutine(func() { 2276 defer s.grWG.Done() 2277 t := time.NewTicker(time.Second) 2278 defer t.Stop() 2279 for { 2280 select { 2281 case <-s.quitCh: 2282 return 2283 case <-t.C: 2284 sacc := s.SystemAccount() 2285 if claimJWT, err := fetchAccount(ar, opts.SystemAccount); err != nil { 2286 continue 2287 } else if err = s.updateAccountWithClaimJWT(sacc, claimJWT); err != nil { 2288 continue 2289 } 2290 s.Noticef("System account fetched and updated") 2291 return 2292 } 2293 } 2294 }) 2295 } 2296 } 2297 } 2298 2299 // Start expiration of mapped GW replies, regardless if 2300 // this server is configured with gateway or not. 2301 s.startGWReplyMapExpiration() 2302 2303 // Check if JetStream has been enabled. This needs to be after 2304 // the system account setup above. JetStream will create its 2305 // own system account if one is not present. 2306 if opts.JetStream { 2307 // Make sure someone is not trying to enable on the system account. 2308 if sa := s.SystemAccount(); sa != nil && len(sa.jsLimits) > 0 { 2309 s.Fatalf("Not allowed to enable JetStream on the system account") 2310 } 2311 cfg := &JetStreamConfig{ 2312 StoreDir: opts.StoreDir, 2313 SyncInterval: opts.SyncInterval, 2314 SyncAlways: opts.SyncAlways, 2315 MaxMemory: opts.JetStreamMaxMemory, 2316 MaxStore: opts.JetStreamMaxStore, 2317 Domain: opts.JetStreamDomain, 2318 CompressOK: true, 2319 UniqueTag: opts.JetStreamUniqueTag, 2320 } 2321 if err := s.EnableJetStream(cfg); err != nil { 2322 s.Fatalf("Can't start JetStream: %v", err) 2323 return 2324 } 2325 } else { 2326 // Check to see if any configured accounts have JetStream enabled. 2327 sa, ga := s.SystemAccount(), s.GlobalAccount() 2328 var hasSys, hasGlobal bool 2329 var total int 2330 2331 s.accounts.Range(func(k, v interface{}) bool { 2332 total++ 2333 acc := v.(*Account) 2334 if acc == sa { 2335 hasSys = true 2336 } else if acc == ga { 2337 hasGlobal = true 2338 } 2339 acc.mu.RLock() 2340 hasJs := len(acc.jsLimits) > 0 2341 acc.mu.RUnlock() 2342 if hasJs { 2343 s.checkJetStreamExports() 2344 acc.enableAllJetStreamServiceImportsAndMappings() 2345 } 2346 return true 2347 }) 2348 // If we only have the system account and the global account and we are not standalone, 2349 // go ahead and enable JS on $G in case we are in simple mixed mode setup. 2350 if total == 2 && hasSys && hasGlobal && !s.standAloneMode() { 2351 ga.mu.Lock() 2352 ga.jsLimits = map[string]JetStreamAccountLimits{ 2353 _EMPTY_: dynamicJSAccountLimits, 2354 } 2355 ga.mu.Unlock() 2356 s.checkJetStreamExports() 2357 ga.enableAllJetStreamServiceImportsAndMappings() 2358 } 2359 } 2360 2361 // Start OCSP Stapling monitoring for TLS certificates if enabled. Hook TLS handshake for 2362 // OCSP check on peers (LEAF and CLIENT kind) if enabled. 2363 s.startOCSPMonitoring() 2364 2365 // Configure OCSP Response Cache for peer OCSP checks if enabled. 2366 s.initOCSPResponseCache() 2367 2368 // Start up gateway if needed. Do this before starting the routes, because 2369 // we want to resolve the gateway host:port so that this information can 2370 // be sent to other routes. 2371 if opts.Gateway.Port != 0 { 2372 s.startGateways() 2373 } 2374 2375 // Start websocket server if needed. Do this before starting the routes, and 2376 // leaf node because we want to resolve the gateway host:port so that this 2377 // information can be sent to other routes. 2378 if opts.Websocket.Port != 0 { 2379 s.startWebsocketServer() 2380 } 2381 2382 // Start up listen if we want to accept leaf node connections. 2383 if opts.LeafNode.Port != 0 { 2384 // Will resolve or assign the advertise address for the leafnode listener. 2385 // We need that in StartRouting(). 2386 s.startLeafNodeAcceptLoop() 2387 } 2388 2389 // Solicit remote servers for leaf node connections. 2390 if len(opts.LeafNode.Remotes) > 0 { 2391 s.solicitLeafNodeRemotes(opts.LeafNode.Remotes) 2392 } 2393 2394 // TODO (ik): I wanted to refactor this by starting the client 2395 // accept loop first, that is, it would resolve listen spec 2396 // in place, but start the accept-for-loop in a different go 2397 // routine. This would get rid of the synchronization between 2398 // this function and StartRouting, which I also would have wanted 2399 // to refactor, but both AcceptLoop() and StartRouting() have 2400 // been exported and not sure if that would break users using them. 2401 // We could mark them as deprecated and remove in a release or two... 2402 2403 // The Routing routine needs to wait for the client listen 2404 // port to be opened and potential ephemeral port selected. 2405 clientListenReady := make(chan struct{}) 2406 2407 // MQTT 2408 if opts.MQTT.Port != 0 { 2409 s.startMQTT() 2410 } 2411 2412 // Start up routing as well if needed. 2413 if opts.Cluster.Port != 0 { 2414 s.startGoRoutine(func() { 2415 s.StartRouting(clientListenReady) 2416 }) 2417 } 2418 2419 if opts.PortsFileDir != _EMPTY_ { 2420 s.logPorts() 2421 } 2422 2423 if opts.TLSRateLimit > 0 { 2424 s.startGoRoutine(s.logRejectedTLSConns) 2425 } 2426 2427 // We've finished starting up. 2428 close(s.startupComplete) 2429 2430 // Wait for clients. 2431 if !opts.DontListen { 2432 s.AcceptLoop(clientListenReady) 2433 } 2434 2435 // Bring OSCP Response cache online after accept loop started in anticipation of NATS-enabled cache types 2436 s.startOCSPResponseCache() 2437 } 2438 2439 func (s *Server) isShuttingDown() bool { 2440 return s.shutdown.Load() 2441 } 2442 2443 // Shutdown will shutdown the server instance by kicking out the AcceptLoop 2444 // and closing all associated clients. 2445 func (s *Server) Shutdown() { 2446 if s == nil { 2447 return 2448 } 2449 // This is for JetStream R1 Pull Consumers to allow signaling 2450 // that pending pull requests are invalid. 2451 s.signalPullConsumers() 2452 2453 // Transfer off any raft nodes that we are a leader by stepping them down. 2454 s.stepdownRaftNodes() 2455 2456 // Shutdown the eventing system as needed. 2457 // This is done first to send out any messages for 2458 // account status. We will also clean up any 2459 // eventing items associated with accounts. 2460 s.shutdownEventing() 2461 2462 // Prevent issues with multiple calls. 2463 if s.isShuttingDown() { 2464 return 2465 } 2466 2467 s.mu.Lock() 2468 s.Noticef("Initiating Shutdown...") 2469 2470 accRes := s.accResolver 2471 2472 opts := s.getOpts() 2473 2474 s.shutdown.Store(true) 2475 s.running.Store(false) 2476 s.grMu.Lock() 2477 s.grRunning = false 2478 s.grMu.Unlock() 2479 s.mu.Unlock() 2480 2481 if accRes != nil { 2482 accRes.Close() 2483 } 2484 2485 // Now check and shutdown jetstream. 2486 s.shutdownJetStream() 2487 2488 // Now shutdown the nodes 2489 s.shutdownRaftNodes() 2490 2491 s.mu.Lock() 2492 conns := make(map[uint64]*client) 2493 2494 // Copy off the clients 2495 for i, c := range s.clients { 2496 conns[i] = c 2497 } 2498 // Copy off the connections that are not yet registered 2499 // in s.routes, but for which the readLoop has started 2500 s.grMu.Lock() 2501 for i, c := range s.grTmpClients { 2502 conns[i] = c 2503 } 2504 s.grMu.Unlock() 2505 // Copy off the routes 2506 s.forEachRoute(func(r *client) { 2507 r.mu.Lock() 2508 conns[r.cid] = r 2509 r.mu.Unlock() 2510 }) 2511 // Copy off the gateways 2512 s.getAllGatewayConnections(conns) 2513 2514 // Copy off the leaf nodes 2515 for i, c := range s.leafs { 2516 conns[i] = c 2517 } 2518 2519 // Number of done channel responses we expect. 2520 doneExpected := 0 2521 2522 // Kick client AcceptLoop() 2523 if s.listener != nil { 2524 doneExpected++ 2525 s.listener.Close() 2526 s.listener = nil 2527 } 2528 2529 // Kick websocket server 2530 if s.websocket.server != nil { 2531 doneExpected++ 2532 s.websocket.server.Close() 2533 s.websocket.server = nil 2534 s.websocket.listener = nil 2535 } 2536 2537 // Kick MQTT accept loop 2538 if s.mqtt.listener != nil { 2539 doneExpected++ 2540 s.mqtt.listener.Close() 2541 s.mqtt.listener = nil 2542 } 2543 2544 // Kick leafnodes AcceptLoop() 2545 if s.leafNodeListener != nil { 2546 doneExpected++ 2547 s.leafNodeListener.Close() 2548 s.leafNodeListener = nil 2549 } 2550 2551 // Kick route AcceptLoop() 2552 if s.routeListener != nil { 2553 doneExpected++ 2554 s.routeListener.Close() 2555 s.routeListener = nil 2556 } 2557 2558 // Kick Gateway AcceptLoop() 2559 if s.gatewayListener != nil { 2560 doneExpected++ 2561 s.gatewayListener.Close() 2562 s.gatewayListener = nil 2563 } 2564 2565 // Kick HTTP monitoring if its running 2566 if s.http != nil { 2567 doneExpected++ 2568 s.http.Close() 2569 s.http = nil 2570 } 2571 2572 // Kick Profiling if its running 2573 if s.profiler != nil { 2574 doneExpected++ 2575 s.profiler.Close() 2576 } 2577 2578 s.mu.Unlock() 2579 2580 // Release go routines that wait on that channel 2581 close(s.quitCh) 2582 2583 // Close client and route connections 2584 for _, c := range conns { 2585 c.setNoReconnect() 2586 c.closeConnection(ServerShutdown) 2587 } 2588 2589 // Block until the accept loops exit 2590 for doneExpected > 0 { 2591 <-s.done 2592 doneExpected-- 2593 } 2594 2595 // Wait for go routines to be done. 2596 s.grWG.Wait() 2597 2598 if opts.PortsFileDir != _EMPTY_ { 2599 s.deletePortsFile(opts.PortsFileDir) 2600 } 2601 2602 s.Noticef("Server Exiting..") 2603 2604 // Stop OCSP Response Cache 2605 if s.ocsprc != nil { 2606 s.ocsprc.Stop(s) 2607 } 2608 2609 // Close logger if applicable. It allows tests on Windows 2610 // to be able to do proper cleanup (delete log file). 2611 s.logging.RLock() 2612 log := s.logging.logger 2613 s.logging.RUnlock() 2614 if log != nil { 2615 if l, ok := log.(*logger.Logger); ok { 2616 l.Close() 2617 } 2618 } 2619 // Notify that the shutdown is complete 2620 close(s.shutdownComplete) 2621 } 2622 2623 // WaitForShutdown will block until the server has been fully shutdown. 2624 func (s *Server) WaitForShutdown() { 2625 <-s.shutdownComplete 2626 } 2627 2628 // AcceptLoop is exported for easier testing. 2629 func (s *Server) AcceptLoop(clr chan struct{}) { 2630 // If we were to exit before the listener is setup properly, 2631 // make sure we close the channel. 2632 defer func() { 2633 if clr != nil { 2634 close(clr) 2635 } 2636 }() 2637 2638 if s.isShuttingDown() { 2639 return 2640 } 2641 2642 // Snapshot server options. 2643 opts := s.getOpts() 2644 2645 // Setup state that can enable shutdown 2646 s.mu.Lock() 2647 hp := net.JoinHostPort(opts.Host, strconv.Itoa(opts.Port)) 2648 l, e := s.network.ListenCause("tcp", hp, "client") 2649 s.listenerErr = e 2650 if e != nil { 2651 s.mu.Unlock() 2652 s.Fatalf("Error listening on port: %s, %q", hp, e) 2653 return 2654 } 2655 s.Noticef("Listening for client connections on %s", 2656 net.JoinHostPort(opts.Host, strconv.Itoa(l.Addr().(*net.TCPAddr).Port))) 2657 2658 // Alert of TLS enabled. 2659 if opts.TLSConfig != nil { 2660 s.Noticef("TLS required for client connections") 2661 if opts.TLSHandshakeFirst && opts.TLSHandshakeFirstFallback == 0 { 2662 s.Warnf("Clients that are not using \"TLS Handshake First\" option will fail to connect") 2663 } 2664 } 2665 2666 // If server was started with RANDOM_PORT (-1), opts.Port would be equal 2667 // to 0 at the beginning this function. So we need to get the actual port 2668 if opts.Port == 0 { 2669 // Write resolved port back to options. 2670 opts.Port = l.Addr().(*net.TCPAddr).Port 2671 } 2672 2673 // Now that port has been set (if it was set to RANDOM), set the 2674 // server's info Host/Port with either values from Options or 2675 // ClientAdvertise. 2676 if err := s.setInfoHostPort(); err != nil { 2677 s.Fatalf("Error setting server INFO with ClientAdvertise value of %s, err=%v", opts.ClientAdvertise, err) 2678 l.Close() 2679 s.mu.Unlock() 2680 return 2681 } 2682 // Keep track of client connect URLs. We may need them later. 2683 s.clientConnectURLs = s.getClientConnectURLs() 2684 s.listener = l 2685 2686 go s.acceptConnections(l, "Client", func(conn net.Conn) { s.createClient(conn) }, 2687 func(_ error) bool { 2688 if s.isLameDuckMode() { 2689 // Signal that we are not accepting new clients 2690 s.ldmCh <- true 2691 // Now wait for the Shutdown... 2692 <-s.quitCh 2693 return true 2694 } 2695 return false 2696 }) 2697 s.mu.Unlock() 2698 2699 // Let the caller know that we are ready 2700 close(clr) 2701 clr = nil 2702 } 2703 2704 // InProcessConn returns an in-process connection to the server, 2705 // avoiding the need to use a TCP listener for local connectivity 2706 // within the same process. This can be used regardless of the 2707 // state of the DontListen option. 2708 func (s *Server) InProcessConn() (net.Conn, error) { 2709 pl, pr := net.Pipe() 2710 if e := s.RegisterExternalConn(pl); e != nil { 2711 pl.Close() 2712 pr.Close() 2713 return nil, e 2714 } 2715 return pr, nil 2716 } 2717 2718 // RegisterConn will register an externally accepted connection with the server. 2719 // This is used for in-process connections and for testing. 2720 func (s *Server) RegisterExternalConn(con net.Conn) error { 2721 if !s.startGoRoutine(func() { 2722 s.createClientInProcess(con) 2723 s.grWG.Done() 2724 }) { 2725 return fmt.Errorf("failed to create connection") 2726 } 2727 return nil 2728 } 2729 2730 func (s *Server) acceptConnections(l net.Listener, acceptName string, createFunc func(conn net.Conn), errFunc func(err error) bool) { 2731 tmpDelay := ACCEPT_MIN_SLEEP 2732 2733 for { 2734 conn, err := l.Accept() 2735 if err != nil { 2736 if errFunc != nil && errFunc(err) { 2737 return 2738 } 2739 if tmpDelay = s.acceptError(acceptName, err, tmpDelay); tmpDelay < 0 { 2740 break 2741 } 2742 continue 2743 } 2744 tmpDelay = ACCEPT_MIN_SLEEP 2745 if !s.startGoRoutine(func() { 2746 createFunc(conn) 2747 s.grWG.Done() 2748 }) { 2749 conn.Close() 2750 } 2751 } 2752 s.Debugf(acceptName + " accept loop exiting..") 2753 s.done <- true 2754 } 2755 2756 // This function sets the server's info Host/Port based on server Options. 2757 // Note that this function may be called during config reload, this is why 2758 // Host/Port may be reset to original Options if the ClientAdvertise option 2759 // is not set (since it may have previously been). 2760 func (s *Server) setInfoHostPort() error { 2761 // When this function is called, opts.Port is set to the actual listen 2762 // port (if option was originally set to RANDOM), even during a config 2763 // reload. So use of s.opts.Port is safe. 2764 opts := s.getOpts() 2765 if opts.ClientAdvertise != _EMPTY_ { 2766 h, p, err := parseHostPort(opts.ClientAdvertise, opts.Port) 2767 if err != nil { 2768 return err 2769 } 2770 s.info.Host = h 2771 s.info.Port = p 2772 } else { 2773 s.info.Host = opts.Host 2774 s.info.Port = opts.Port 2775 } 2776 return nil 2777 } 2778 2779 // StartProfiler is called to enable dynamic profiling. 2780 func (s *Server) StartProfiler() { 2781 if s.isShuttingDown() { 2782 return 2783 } 2784 2785 // Snapshot server options. 2786 opts := s.getOpts() 2787 2788 port := opts.ProfPort 2789 2790 // Check for Random Port 2791 if port == -1 { 2792 port = 0 2793 } 2794 2795 s.mu.Lock() 2796 hp := net.JoinHostPort(opts.Host, strconv.Itoa(port)) 2797 l, err := s.network.ListenCause("tcp", hp, "profiler") 2798 2799 if err != nil { 2800 s.mu.Unlock() 2801 s.Fatalf("error starting profiler: %s", err) 2802 return 2803 } 2804 s.Noticef("profiling port: %d", l.Addr().(*net.TCPAddr).Port) 2805 2806 srv := &http.Server{ 2807 Addr: hp, 2808 Handler: http.DefaultServeMux, 2809 MaxHeaderBytes: 1 << 20, 2810 } 2811 s.profiler = l 2812 s.profilingServer = srv 2813 2814 s.setBlockProfileRate(opts.ProfBlockRate) 2815 2816 go func() { 2817 // if this errors out, it's probably because the server is being shutdown 2818 err := srv.Serve(l) 2819 if err != nil { 2820 if !s.isShuttingDown() { 2821 s.Fatalf("error starting profiler: %s", err) 2822 } 2823 } 2824 srv.Close() 2825 s.done <- true 2826 }() 2827 s.mu.Unlock() 2828 } 2829 2830 func (s *Server) setBlockProfileRate(rate int) { 2831 // Passing i ProfBlockRate <= 0 here will disable or > 0 will enable. 2832 runtime.SetBlockProfileRate(rate) 2833 2834 if rate > 0 { 2835 s.Warnf("Block profiling is enabled (rate %d), this may have a performance impact", rate) 2836 } 2837 } 2838 2839 // StartHTTPMonitoring will enable the HTTP monitoring port. 2840 // DEPRECATED: Should use StartMonitoring. 2841 func (s *Server) StartHTTPMonitoring() { 2842 s.startMonitoring(false) 2843 } 2844 2845 // StartHTTPSMonitoring will enable the HTTPS monitoring port. 2846 // DEPRECATED: Should use StartMonitoring. 2847 func (s *Server) StartHTTPSMonitoring() { 2848 s.startMonitoring(true) 2849 } 2850 2851 // StartMonitoring starts the HTTP or HTTPs server if needed. 2852 func (s *Server) StartMonitoring() error { 2853 // Snapshot server options. 2854 opts := s.getOpts() 2855 2856 // Specifying both HTTP and HTTPS ports is a misconfiguration 2857 if opts.HTTPPort != 0 && opts.HTTPSPort != 0 { 2858 return fmt.Errorf("can't specify both HTTP (%v) and HTTPs (%v) ports", opts.HTTPPort, opts.HTTPSPort) 2859 } 2860 var err error 2861 if opts.HTTPPort != 0 { 2862 err = s.startMonitoring(false) 2863 } else if opts.HTTPSPort != 0 { 2864 if opts.TLSConfig == nil { 2865 return fmt.Errorf("TLS cert and key required for HTTPS") 2866 } 2867 err = s.startMonitoring(true) 2868 } 2869 return err 2870 } 2871 2872 // HTTP endpoints 2873 const ( 2874 RootPath = "/" 2875 VarzPath = "/varz" 2876 ConnzPath = "/connz" 2877 RoutezPath = "/routez" 2878 GatewayzPath = "/gatewayz" 2879 LeafzPath = "/leafz" 2880 SubszPath = "/subsz" 2881 StackszPath = "/stacksz" 2882 AccountzPath = "/accountz" 2883 AccountStatzPath = "/accstatz" 2884 JszPath = "/jsz" 2885 HealthzPath = "/healthz" 2886 IPQueuesPath = "/ipqueuesz" 2887 ) 2888 2889 func (s *Server) basePath(p string) string { 2890 return path.Join(s.httpBasePath, p) 2891 } 2892 2893 type captureHTTPServerLog struct { 2894 s *Server 2895 prefix string 2896 } 2897 2898 func (cl *captureHTTPServerLog) Write(p []byte) (int, error) { 2899 var buf [128]byte 2900 var b = buf[:0] 2901 2902 b = append(b, []byte(cl.prefix)...) 2903 offset := 0 2904 if bytes.HasPrefix(p, []byte("http:")) { 2905 offset = 6 2906 } 2907 b = append(b, p[offset:]...) 2908 cl.s.Errorf(string(b)) 2909 return len(p), nil 2910 } 2911 2912 // The TLS configuration is passed to the listener when the monitoring 2913 // "server" is setup. That prevents TLS configuration updates on reload 2914 // from being used. By setting this function in tls.Config.GetConfigForClient 2915 // we instruct the TLS handshake to ask for the tls configuration to be 2916 // used for a specific client. We don't care which client, we always use 2917 // the same TLS configuration. 2918 func (s *Server) getMonitoringTLSConfig(_ *tls.ClientHelloInfo) (*tls.Config, error) { 2919 opts := s.getOpts() 2920 tc := opts.TLSConfig.Clone() 2921 tc.ClientAuth = tls.NoClientCert 2922 return tc, nil 2923 } 2924 2925 // Start the monitoring server 2926 func (s *Server) startMonitoring(secure bool) error { 2927 if s.isShuttingDown() { 2928 return nil 2929 } 2930 2931 // Snapshot server options. 2932 opts := s.getOpts() 2933 2934 var ( 2935 hp string 2936 err error 2937 httpListener net.Listener 2938 port int 2939 ) 2940 2941 monitorProtocol := "http" 2942 2943 if secure { 2944 monitorProtocol += "s" 2945 port = opts.HTTPSPort 2946 if port == -1 { 2947 port = 0 2948 } 2949 hp = net.JoinHostPort(opts.HTTPHost, strconv.Itoa(port)) 2950 config := opts.TLSConfig.Clone() 2951 config.GetConfigForClient = s.getMonitoringTLSConfig 2952 config.ClientAuth = tls.NoClientCert 2953 httpListener, err = tls.Listen("tcp", hp, config) 2954 2955 } else { 2956 port = opts.HTTPPort 2957 if port == -1 { 2958 port = 0 2959 } 2960 hp = net.JoinHostPort(opts.HTTPHost, strconv.Itoa(port)) 2961 httpListener, err = s.network.ListenCause("tcp", hp, "monitor") 2962 } 2963 2964 if err != nil { 2965 return fmt.Errorf("can't listen to the monitor port: %v", err) 2966 } 2967 2968 rport := httpListener.Addr().(*net.TCPAddr).Port 2969 s.Noticef("Starting %s monitor on %s", monitorProtocol, net.JoinHostPort(opts.HTTPHost, strconv.Itoa(rport))) 2970 2971 mux := http.NewServeMux() 2972 2973 // Root 2974 mux.HandleFunc(s.basePath(RootPath), s.HandleRoot) 2975 // Varz 2976 mux.HandleFunc(s.basePath(VarzPath), s.HandleVarz) 2977 // Connz 2978 mux.HandleFunc(s.basePath(ConnzPath), s.HandleConnz) 2979 // Routez 2980 mux.HandleFunc(s.basePath(RoutezPath), s.HandleRoutez) 2981 // Gatewayz 2982 mux.HandleFunc(s.basePath(GatewayzPath), s.HandleGatewayz) 2983 // Leafz 2984 mux.HandleFunc(s.basePath(LeafzPath), s.HandleLeafz) 2985 // Subz 2986 mux.HandleFunc(s.basePath(SubszPath), s.HandleSubsz) 2987 // Subz alias for backwards compatibility 2988 mux.HandleFunc(s.basePath("/subscriptionsz"), s.HandleSubsz) 2989 // Stacksz 2990 mux.HandleFunc(s.basePath(StackszPath), s.HandleStacksz) 2991 // Accountz 2992 mux.HandleFunc(s.basePath(AccountzPath), s.HandleAccountz) 2993 // Accstatz 2994 mux.HandleFunc(s.basePath(AccountStatzPath), s.HandleAccountStatz) 2995 // Jsz 2996 mux.HandleFunc(s.basePath(JszPath), s.HandleJsz) 2997 // Healthz 2998 mux.HandleFunc(s.basePath(HealthzPath), s.HandleHealthz) 2999 // IPQueuesz 3000 mux.HandleFunc(s.basePath(IPQueuesPath), s.HandleIPQueuesz) 3001 3002 // Do not set a WriteTimeout because it could cause cURL/browser 3003 // to return empty response or unable to display page if the 3004 // server needs more time to build the response. 3005 srv := &http.Server{ 3006 Addr: hp, 3007 Handler: mux, 3008 MaxHeaderBytes: 1 << 20, 3009 ErrorLog: log.New(&captureHTTPServerLog{s, "monitoring: "}, _EMPTY_, 0), 3010 } 3011 s.mu.Lock() 3012 s.http = httpListener 3013 s.httpHandler = mux 3014 s.monitoringServer = srv 3015 s.mu.Unlock() 3016 3017 go func() { 3018 if err := srv.Serve(httpListener); err != nil { 3019 if !s.isShuttingDown() { 3020 s.Fatalf("Error starting monitor on %q: %v", hp, err) 3021 } 3022 } 3023 srv.Close() 3024 s.mu.Lock() 3025 s.httpHandler = nil 3026 s.mu.Unlock() 3027 s.done <- true 3028 }() 3029 3030 return nil 3031 } 3032 3033 // HTTPHandler returns the http.Handler object used to handle monitoring 3034 // endpoints. It will return nil if the server is not configured for 3035 // monitoring, or if the server has not been started yet (Server.Start()). 3036 func (s *Server) HTTPHandler() http.Handler { 3037 s.mu.Lock() 3038 defer s.mu.Unlock() 3039 return s.httpHandler 3040 } 3041 3042 // Perform a conditional deep copy due to reference nature of [Client|WS]ConnectURLs. 3043 // If updates are made to Info, this function should be consulted and updated. 3044 // Assume lock is held. 3045 func (s *Server) copyInfo() Info { 3046 info := s.info 3047 if len(info.ClientConnectURLs) > 0 { 3048 info.ClientConnectURLs = append([]string(nil), s.info.ClientConnectURLs...) 3049 } 3050 if len(info.WSConnectURLs) > 0 { 3051 info.WSConnectURLs = append([]string(nil), s.info.WSConnectURLs...) 3052 } 3053 return info 3054 } 3055 3056 // tlsMixConn is used when we can receive both TLS and non-TLS connections on same port. 3057 type tlsMixConn struct { 3058 net.Conn 3059 pre *bytes.Buffer 3060 } 3061 3062 // Read for our mixed multi-reader. 3063 func (c *tlsMixConn) Read(b []byte) (int, error) { 3064 if c.pre != nil { 3065 n, err := c.pre.Read(b) 3066 if c.pre.Len() == 0 { 3067 c.pre = nil 3068 } 3069 return n, err 3070 } 3071 return c.Conn.Read(b) 3072 } 3073 3074 func (s *Server) createClient(conn net.Conn) *client { 3075 return s.createClientEx(conn, false) 3076 } 3077 3078 func (s *Server) createClientInProcess(conn net.Conn) *client { 3079 return s.createClientEx(conn, true) 3080 } 3081 3082 func (s *Server) createClientEx(conn net.Conn, inProcess bool) *client { 3083 // Snapshot server options. 3084 opts := s.getOpts() 3085 3086 maxPay := int32(opts.MaxPayload) 3087 maxSubs := int32(opts.MaxSubs) 3088 // For system, maxSubs of 0 means unlimited, so re-adjust here. 3089 if maxSubs == 0 { 3090 maxSubs = -1 3091 } 3092 now := time.Now() 3093 3094 c := &client{srv: s, nc: conn, opts: defaultOpts, mpay: maxPay, msubs: maxSubs, start: now, last: now} 3095 3096 c.registerWithAccount(s.globalAccount()) 3097 3098 var info Info 3099 var authRequired bool 3100 3101 s.mu.Lock() 3102 // Grab JSON info string 3103 info = s.copyInfo() 3104 if s.nonceRequired() { 3105 // Nonce handling 3106 var raw [nonceLen]byte 3107 nonce := raw[:] 3108 s.generateNonce(nonce) 3109 info.Nonce = string(nonce) 3110 } 3111 c.nonce = []byte(info.Nonce) 3112 authRequired = info.AuthRequired 3113 3114 // Check to see if we have auth_required set but we also have a no_auth_user. 3115 // If so set back to false. 3116 if info.AuthRequired && opts.NoAuthUser != _EMPTY_ && opts.NoAuthUser != s.sysAccOnlyNoAuthUser { 3117 info.AuthRequired = false 3118 } 3119 3120 // Check to see if this is an in-process connection with tls_required. 3121 // If so, set as not required, but available. 3122 if inProcess && info.TLSRequired { 3123 info.TLSRequired = false 3124 info.TLSAvailable = true 3125 } 3126 3127 s.totalClients++ 3128 s.mu.Unlock() 3129 3130 // Grab lock 3131 c.mu.Lock() 3132 if authRequired { 3133 c.flags.set(expectConnect) 3134 } 3135 3136 // Initialize 3137 c.initClient() 3138 3139 c.Debugf("Client connection created") 3140 3141 // Save info.TLSRequired value since we may neeed to change it back and forth. 3142 orgInfoTLSReq := info.TLSRequired 3143 3144 var tlsFirstFallback time.Duration 3145 // Check if we should do TLS first. 3146 tlsFirst := opts.TLSConfig != nil && opts.TLSHandshakeFirst 3147 if tlsFirst { 3148 // Make sure info.TLSRequired is set to true (it could be false 3149 // if AllowNonTLS is enabled). 3150 info.TLSRequired = true 3151 // Get the fallback delay value if applicable. 3152 if f := opts.TLSHandshakeFirstFallback; f > 0 { 3153 tlsFirstFallback = f 3154 } else if inProcess { 3155 // For in-process connection, we will always have a fallback 3156 // delay. It allows support for non-TLS, TLS and "TLS First" 3157 // in-process clients to successfully connect. 3158 tlsFirstFallback = DEFAULT_TLS_HANDSHAKE_FIRST_FALLBACK_DELAY 3159 } 3160 } 3161 3162 // Decide if we are going to require TLS or not and generate INFO json. 3163 tlsRequired := info.TLSRequired 3164 infoBytes := c.generateClientInfoJSON(info) 3165 3166 // Send our information, except if TLS and TLSHandshakeFirst is requested. 3167 if !tlsFirst { 3168 // Need to be sent in place since writeLoop cannot be started until 3169 // TLS handshake is done (if applicable). 3170 c.sendProtoNow(infoBytes) 3171 } 3172 3173 // Unlock to register 3174 c.mu.Unlock() 3175 3176 // Register with the server. 3177 s.mu.Lock() 3178 // If server is not running, Shutdown() may have already gathered the 3179 // list of connections to close. It won't contain this one, so we need 3180 // to bail out now otherwise the readLoop started down there would not 3181 // be interrupted. Skip also if in lame duck mode. 3182 if !s.isRunning() || s.ldm { 3183 // There are some tests that create a server but don't start it, 3184 // and use "async" clients and perform the parsing manually. Such 3185 // clients would branch here (since server is not running). However, 3186 // when a server was really running and has been shutdown, we must 3187 // close this connection. 3188 if s.isShuttingDown() { 3189 conn.Close() 3190 } 3191 s.mu.Unlock() 3192 return c 3193 } 3194 3195 // If there is a max connections specified, check that adding 3196 // this new client would not push us over the max 3197 if opts.MaxConn > 0 && len(s.clients) >= opts.MaxConn { 3198 s.mu.Unlock() 3199 c.maxConnExceeded() 3200 return nil 3201 } 3202 s.clients[c.cid] = c 3203 3204 s.mu.Unlock() 3205 3206 // Re-Grab lock 3207 c.mu.Lock() 3208 3209 isClosed := c.isClosed() 3210 var pre []byte 3211 // We need first to check for "TLS First" fallback delay. 3212 if !isClosed && tlsFirstFallback > 0 { 3213 // We wait and see if we are getting any data. Since we did not send 3214 // the INFO protocol yet, only clients that use TLS first should be 3215 // sending data (the TLS handshake). We don't really check the content: 3216 // if it is a rogue agent and not an actual client performing the 3217 // TLS handshake, the error will be detected when performing the 3218 // handshake on our side. 3219 pre = make([]byte, 4) 3220 c.nc.SetReadDeadline(time.Now().Add(tlsFirstFallback)) 3221 n, _ := io.ReadFull(c.nc, pre[:]) 3222 c.nc.SetReadDeadline(time.Time{}) 3223 // If we get any data (regardless of possible timeout), we will proceed 3224 // with the TLS handshake. 3225 if n > 0 { 3226 pre = pre[:n] 3227 } else { 3228 // We did not get anything so we will send the INFO protocol. 3229 pre = nil 3230 3231 // Restore the original info.TLSRequired value if it is 3232 // different that the current value and regenerate infoBytes. 3233 if orgInfoTLSReq != info.TLSRequired { 3234 info.TLSRequired = orgInfoTLSReq 3235 infoBytes = c.generateClientInfoJSON(info) 3236 } 3237 c.sendProtoNow(infoBytes) 3238 // Set the boolean to false for the rest of the function. 3239 tlsFirst = false 3240 // Check closed status again 3241 isClosed = c.isClosed() 3242 } 3243 } 3244 // If we have both TLS and non-TLS allowed we need to see which 3245 // one the client wants. We'll always allow this for in-process 3246 // connections. 3247 if !isClosed && !tlsFirst && opts.TLSConfig != nil && (inProcess || opts.AllowNonTLS) { 3248 pre = make([]byte, 4) 3249 c.nc.SetReadDeadline(time.Now().Add(secondsToDuration(opts.TLSTimeout))) 3250 n, _ := io.ReadFull(c.nc, pre[:]) 3251 c.nc.SetReadDeadline(time.Time{}) 3252 pre = pre[:n] 3253 if n > 0 && pre[0] == 0x16 { 3254 tlsRequired = true 3255 } else { 3256 tlsRequired = false 3257 } 3258 } 3259 3260 // Check for TLS 3261 if !isClosed && tlsRequired { 3262 if s.connRateCounter != nil && !s.connRateCounter.allow() { 3263 c.mu.Unlock() 3264 c.sendErr("Connection throttling is active. Please try again later.") 3265 c.closeConnection(MaxConnectionsExceeded) 3266 return nil 3267 } 3268 3269 // If we have a prebuffer create a multi-reader. 3270 if len(pre) > 0 { 3271 c.nc = &tlsMixConn{c.nc, bytes.NewBuffer(pre)} 3272 // Clear pre so it is not parsed. 3273 pre = nil 3274 } 3275 // Performs server-side TLS handshake. 3276 if err := c.doTLSServerHandshake(_EMPTY_, opts.TLSConfig, opts.TLSTimeout, opts.TLSPinnedCerts); err != nil { 3277 c.mu.Unlock() 3278 return nil 3279 } 3280 } 3281 3282 // Now, send the INFO if it was delayed 3283 if !isClosed && tlsFirst { 3284 c.flags.set(didTLSFirst) 3285 c.sendProtoNow(infoBytes) 3286 // Check closed status 3287 isClosed = c.isClosed() 3288 } 3289 3290 // Connection could have been closed while sending the INFO proto. 3291 if isClosed { 3292 c.mu.Unlock() 3293 // We need to call closeConnection() to make sure that proper cleanup is done. 3294 c.closeConnection(WriteError) 3295 return nil 3296 } 3297 3298 // Check for Auth. We schedule this timer after the TLS handshake to avoid 3299 // the race where the timer fires during the handshake and causes the 3300 // server to write bad data to the socket. See issue #432. 3301 if authRequired { 3302 c.setAuthTimer(secondsToDuration(opts.AuthTimeout)) 3303 } 3304 3305 // Do final client initialization 3306 3307 // Set the Ping timer. Will be reset once connect was received. 3308 c.setPingTimer() 3309 3310 // Spin up the read loop. 3311 s.startGoRoutine(func() { c.readLoop(pre) }) 3312 3313 // Spin up the write loop. 3314 s.startGoRoutine(func() { c.writeLoop() }) 3315 3316 if tlsRequired { 3317 c.Debugf("TLS handshake complete") 3318 cs := c.nc.(*tls.Conn).ConnectionState() 3319 c.Debugf("TLS version %s, cipher suite %s", tlsVersion(cs.Version), tlsCipher(cs.CipherSuite)) 3320 } 3321 3322 c.mu.Unlock() 3323 3324 return c 3325 } 3326 3327 // This will save off a closed client in a ring buffer such that 3328 // /connz can inspect. Useful for debugging, etc. 3329 func (s *Server) saveClosedClient(c *client, nc net.Conn, reason ClosedState) { 3330 now := time.Now() 3331 3332 s.accountDisconnectEvent(c, now, reason.String()) 3333 3334 c.mu.Lock() 3335 3336 cc := &closedClient{} 3337 cc.fill(c, nc, now, false) 3338 cc.Stop = &now 3339 cc.Reason = reason.String() 3340 3341 // Do subs, do not place by default in main ConnInfo 3342 if len(c.subs) > 0 { 3343 cc.subs = make([]SubDetail, 0, len(c.subs)) 3344 for _, sub := range c.subs { 3345 cc.subs = append(cc.subs, newSubDetail(sub)) 3346 } 3347 } 3348 // Hold user as well. 3349 cc.user = c.getRawAuthUser() 3350 // Hold account name if not the global account. 3351 if c.acc != nil && c.acc.Name != globalAccountName { 3352 cc.acc = c.acc.Name 3353 } 3354 cc.JWT = c.opts.JWT 3355 cc.IssuerKey = issuerForClient(c) 3356 cc.Tags = c.tags 3357 cc.NameTag = c.nameTag 3358 c.mu.Unlock() 3359 3360 // Place in the ring buffer 3361 s.mu.Lock() 3362 if s.closed != nil { 3363 s.closed.append(cc) 3364 } 3365 s.mu.Unlock() 3366 } 3367 3368 // Adds to the list of client and websocket clients connect URLs. 3369 // If there was a change, an INFO protocol is sent to registered clients 3370 // that support async INFO protocols. 3371 // Server lock held on entry. 3372 func (s *Server) addConnectURLsAndSendINFOToClients(curls, wsurls []string) { 3373 s.updateServerINFOAndSendINFOToClients(curls, wsurls, true) 3374 } 3375 3376 // Removes from the list of client and websocket clients connect URLs. 3377 // If there was a change, an INFO protocol is sent to registered clients 3378 // that support async INFO protocols. 3379 // Server lock held on entry. 3380 func (s *Server) removeConnectURLsAndSendINFOToClients(curls, wsurls []string) { 3381 s.updateServerINFOAndSendINFOToClients(curls, wsurls, false) 3382 } 3383 3384 // Updates the list of client and websocket clients connect URLs and if any change 3385 // sends an async INFO update to clients that support it. 3386 // Server lock held on entry. 3387 func (s *Server) updateServerINFOAndSendINFOToClients(curls, wsurls []string, add bool) { 3388 remove := !add 3389 // Will return true if we need alter the server's Info object. 3390 updateMap := func(urls []string, m refCountedUrlSet) bool { 3391 wasUpdated := false 3392 for _, url := range urls { 3393 if add && m.addUrl(url) { 3394 wasUpdated = true 3395 } else if remove && m.removeUrl(url) { 3396 wasUpdated = true 3397 } 3398 } 3399 return wasUpdated 3400 } 3401 cliUpdated := updateMap(curls, s.clientConnectURLsMap) 3402 wsUpdated := updateMap(wsurls, s.websocket.connectURLsMap) 3403 3404 updateInfo := func(infoURLs *[]string, urls []string, m refCountedUrlSet) { 3405 // Recreate the info's slice from the map 3406 *infoURLs = (*infoURLs)[:0] 3407 // Add this server client connect ULRs first... 3408 *infoURLs = append(*infoURLs, urls...) 3409 // Then the ones from the map 3410 for url := range m { 3411 *infoURLs = append(*infoURLs, url) 3412 } 3413 } 3414 if cliUpdated { 3415 updateInfo(&s.info.ClientConnectURLs, s.clientConnectURLs, s.clientConnectURLsMap) 3416 } 3417 if wsUpdated { 3418 updateInfo(&s.info.WSConnectURLs, s.websocket.connectURLs, s.websocket.connectURLsMap) 3419 } 3420 if cliUpdated || wsUpdated { 3421 // Update the time of this update 3422 s.lastCURLsUpdate = time.Now().UnixNano() 3423 // Send to all registered clients that support async INFO protocols. 3424 s.sendAsyncInfoToClients(cliUpdated, wsUpdated) 3425 } 3426 } 3427 3428 // Handle closing down a connection when the handshake has timedout. 3429 func tlsTimeout(c *client, conn *tls.Conn) { 3430 c.mu.Lock() 3431 closed := c.isClosed() 3432 c.mu.Unlock() 3433 // Check if already closed 3434 if closed { 3435 return 3436 } 3437 cs := conn.ConnectionState() 3438 if !cs.HandshakeComplete { 3439 c.Errorf("TLS handshake timeout") 3440 c.sendErr("Secure Connection - TLS Required") 3441 c.closeConnection(TLSHandshakeError) 3442 } 3443 } 3444 3445 // Seems silly we have to write these 3446 func tlsVersion(ver uint16) string { 3447 switch ver { 3448 case tls.VersionTLS10: 3449 return "1.0" 3450 case tls.VersionTLS11: 3451 return "1.1" 3452 case tls.VersionTLS12: 3453 return "1.2" 3454 case tls.VersionTLS13: 3455 return "1.3" 3456 } 3457 return fmt.Sprintf("Unknown [0x%x]", ver) 3458 } 3459 3460 // We use hex here so we don't need multiple versions 3461 func tlsCipher(cs uint16) string { 3462 name, present := cipherMapByID[cs] 3463 if present { 3464 return name 3465 } 3466 return fmt.Sprintf("Unknown [0x%x]", cs) 3467 } 3468 3469 // Remove a client or route from our internal accounting. 3470 func (s *Server) removeClient(c *client) { 3471 // kind is immutable, so can check without lock 3472 switch c.kind { 3473 case CLIENT: 3474 c.mu.Lock() 3475 cid := c.cid 3476 updateProtoInfoCount := false 3477 if c.kind == CLIENT && c.opts.Protocol >= ClientProtoInfo { 3478 updateProtoInfoCount = true 3479 } 3480 c.mu.Unlock() 3481 3482 s.mu.Lock() 3483 delete(s.clients, cid) 3484 if updateProtoInfoCount { 3485 s.cproto-- 3486 } 3487 s.mu.Unlock() 3488 case ROUTER: 3489 s.removeRoute(c) 3490 case GATEWAY: 3491 s.removeRemoteGatewayConnection(c) 3492 case LEAF: 3493 s.removeLeafNodeConnection(c) 3494 } 3495 } 3496 3497 func (s *Server) removeFromTempClients(cid uint64) { 3498 s.grMu.Lock() 3499 delete(s.grTmpClients, cid) 3500 s.grMu.Unlock() 3501 } 3502 3503 func (s *Server) addToTempClients(cid uint64, c *client) bool { 3504 added := false 3505 s.grMu.Lock() 3506 if s.grRunning { 3507 s.grTmpClients[cid] = c 3508 added = true 3509 } 3510 s.grMu.Unlock() 3511 return added 3512 } 3513 3514 ///////////////////////////////////////////////////////////////// 3515 // These are some helpers for accounting in functional tests. 3516 ///////////////////////////////////////////////////////////////// 3517 3518 // NumRoutes will report the number of registered routes. 3519 func (s *Server) NumRoutes() int { 3520 s.mu.RLock() 3521 defer s.mu.RUnlock() 3522 return s.numRoutes() 3523 } 3524 3525 // numRoutes will report the number of registered routes. 3526 // Server lock held on entry 3527 func (s *Server) numRoutes() int { 3528 var nr int 3529 s.forEachRoute(func(c *client) { 3530 nr++ 3531 }) 3532 return nr 3533 } 3534 3535 // NumRemotes will report number of registered remotes. 3536 func (s *Server) NumRemotes() int { 3537 s.mu.RLock() 3538 defer s.mu.RUnlock() 3539 return s.numRemotes() 3540 } 3541 3542 // numRemotes will report number of registered remotes. 3543 // Server lock held on entry 3544 func (s *Server) numRemotes() int { 3545 return len(s.routes) 3546 } 3547 3548 // NumLeafNodes will report number of leaf node connections. 3549 func (s *Server) NumLeafNodes() int { 3550 s.mu.RLock() 3551 defer s.mu.RUnlock() 3552 return len(s.leafs) 3553 } 3554 3555 // NumClients will report the number of registered clients. 3556 func (s *Server) NumClients() int { 3557 s.mu.RLock() 3558 defer s.mu.RUnlock() 3559 return len(s.clients) 3560 } 3561 3562 // GetClient will return the client associated with cid. 3563 func (s *Server) GetClient(cid uint64) *client { 3564 return s.getClient(cid) 3565 } 3566 3567 // getClient will return the client associated with cid. 3568 func (s *Server) getClient(cid uint64) *client { 3569 s.mu.RLock() 3570 defer s.mu.RUnlock() 3571 return s.clients[cid] 3572 } 3573 3574 // GetLeafNode returns the leafnode associated with the cid. 3575 func (s *Server) GetLeafNode(cid uint64) *client { 3576 s.mu.RLock() 3577 defer s.mu.RUnlock() 3578 return s.leafs[cid] 3579 } 3580 3581 // NumSubscriptions will report how many subscriptions are active. 3582 func (s *Server) NumSubscriptions() uint32 { 3583 s.mu.RLock() 3584 defer s.mu.RUnlock() 3585 return s.numSubscriptions() 3586 } 3587 3588 // numSubscriptions will report how many subscriptions are active. 3589 // Lock should be held. 3590 func (s *Server) numSubscriptions() uint32 { 3591 var subs int 3592 s.accounts.Range(func(k, v interface{}) bool { 3593 acc := v.(*Account) 3594 subs += acc.TotalSubs() 3595 return true 3596 }) 3597 return uint32(subs) 3598 } 3599 3600 // NumSlowConsumers will report the number of slow consumers. 3601 func (s *Server) NumSlowConsumers() int64 { 3602 return atomic.LoadInt64(&s.slowConsumers) 3603 } 3604 3605 // NumSlowConsumersClients will report the number of slow consumers clients. 3606 func (s *Server) NumSlowConsumersClients() uint64 { 3607 return s.scStats.clients.Load() 3608 } 3609 3610 // NumSlowConsumersRoutes will report the number of slow consumers routes. 3611 func (s *Server) NumSlowConsumersRoutes() uint64 { 3612 return s.scStats.routes.Load() 3613 } 3614 3615 // NumSlowConsumersGateways will report the number of slow consumers leafs. 3616 func (s *Server) NumSlowConsumersGateways() uint64 { 3617 return s.scStats.gateways.Load() 3618 } 3619 3620 // NumSlowConsumersLeafs will report the number of slow consumers leafs. 3621 func (s *Server) NumSlowConsumersLeafs() uint64 { 3622 return s.scStats.leafs.Load() 3623 } 3624 3625 // ConfigTime will report the last time the server configuration was loaded. 3626 func (s *Server) ConfigTime() time.Time { 3627 s.mu.RLock() 3628 defer s.mu.RUnlock() 3629 return s.configTime 3630 } 3631 3632 // Addr will return the net.Addr object for the current listener. 3633 func (s *Server) Addr() net.Addr { 3634 s.mu.RLock() 3635 defer s.mu.RUnlock() 3636 if s.listener == nil { 3637 return nil 3638 } 3639 return s.listener.Addr() 3640 } 3641 3642 // MonitorAddr will return the net.Addr object for the monitoring listener. 3643 func (s *Server) MonitorAddr() *net.TCPAddr { 3644 s.mu.RLock() 3645 defer s.mu.RUnlock() 3646 if s.http == nil { 3647 return nil 3648 } 3649 return s.http.Addr().(*net.TCPAddr) 3650 } 3651 3652 // ClusterAddr returns the net.Addr object for the route listener. 3653 func (s *Server) ClusterAddr() *net.TCPAddr { 3654 s.mu.RLock() 3655 defer s.mu.RUnlock() 3656 if s.routeListener == nil { 3657 return nil 3658 } 3659 return s.routeListener.Addr().(*net.TCPAddr) 3660 } 3661 3662 // ProfilerAddr returns the net.Addr object for the profiler listener. 3663 func (s *Server) ProfilerAddr() *net.TCPAddr { 3664 s.mu.RLock() 3665 defer s.mu.RUnlock() 3666 if s.profiler == nil { 3667 return nil 3668 } 3669 return s.profiler.Addr().(*net.TCPAddr) 3670 } 3671 3672 func (s *Server) readyForConnections(d time.Duration) error { 3673 // Snapshot server options. 3674 opts := s.getOpts() 3675 3676 type info struct { 3677 ok bool 3678 err error 3679 } 3680 chk := make(map[string]info) 3681 3682 end := time.Now().Add(d) 3683 for time.Now().Before(end) { 3684 s.mu.RLock() 3685 chk["server"] = info{ok: s.listener != nil || opts.DontListen, err: s.listenerErr} 3686 chk["route"] = info{ok: (opts.Cluster.Port == 0 || s.routeListener != nil), err: s.routeListenerErr} 3687 chk["gateway"] = info{ok: (opts.Gateway.Name == _EMPTY_ || s.gatewayListener != nil), err: s.gatewayListenerErr} 3688 chk["leafnode"] = info{ok: (opts.LeafNode.Port == 0 || s.leafNodeListener != nil), err: s.leafNodeListenerErr} 3689 chk["websocket"] = info{ok: (opts.Websocket.Port == 0 || s.websocket.listener != nil), err: s.websocket.listenerErr} 3690 chk["mqtt"] = info{ok: (opts.MQTT.Port == 0 || s.mqtt.listener != nil), err: s.mqtt.listenerErr} 3691 s.mu.RUnlock() 3692 3693 var numOK int 3694 for _, inf := range chk { 3695 if inf.ok { 3696 numOK++ 3697 } 3698 } 3699 if numOK == len(chk) { 3700 // In the case of DontListen option (no accept loop), we still want 3701 // to make sure that Start() has done all the work, so we wait on 3702 // that. 3703 if opts.DontListen { 3704 select { 3705 case <-s.startupComplete: 3706 case <-time.After(d): 3707 return fmt.Errorf("failed to be ready for connections after %s: startup did not complete", d) 3708 } 3709 } 3710 return nil 3711 } 3712 if d > 25*time.Millisecond { 3713 time.Sleep(25 * time.Millisecond) 3714 } 3715 } 3716 3717 failed := make([]string, 0, len(chk)) 3718 for name, inf := range chk { 3719 if inf.ok && inf.err != nil { 3720 failed = append(failed, fmt.Sprintf("%s(ok, but %s)", name, inf.err)) 3721 } 3722 if !inf.ok && inf.err == nil { 3723 failed = append(failed, name) 3724 } 3725 if !inf.ok && inf.err != nil { 3726 failed = append(failed, fmt.Sprintf("%s(%s)", name, inf.err)) 3727 } 3728 } 3729 3730 return fmt.Errorf( 3731 "failed to be ready for connections after %s: %s", 3732 d, strings.Join(failed, ", "), 3733 ) 3734 } 3735 3736 // ReadyForConnections returns `true` if the server is ready to accept clients 3737 // and, if routing is enabled, route connections. If after the duration 3738 // `dur` the server is still not ready, returns `false`. 3739 func (s *Server) ReadyForConnections(dur time.Duration) bool { 3740 return s.readyForConnections(dur) == nil 3741 } 3742 3743 // Quick utility to function to tell if the server supports headers. 3744 func (s *Server) supportsHeaders() bool { 3745 if s == nil { 3746 return false 3747 } 3748 return !(s.getOpts().NoHeaderSupport) 3749 } 3750 3751 // ID returns the server's ID 3752 func (s *Server) ID() string { 3753 return s.info.ID 3754 } 3755 3756 // NodeName returns the node name for this server. 3757 func (s *Server) NodeName() string { 3758 return getHash(s.info.Name) 3759 } 3760 3761 // Name returns the server's name. This will be the same as the ID if it was not set. 3762 func (s *Server) Name() string { 3763 return s.info.Name 3764 } 3765 3766 func (s *Server) String() string { 3767 return s.info.Name 3768 } 3769 3770 type pprofLabels map[string]string 3771 3772 func setGoRoutineLabels(tags ...pprofLabels) { 3773 var labels []string 3774 for _, m := range tags { 3775 for k, v := range m { 3776 labels = append(labels, k, v) 3777 } 3778 } 3779 if len(labels) > 0 { 3780 pprof.SetGoroutineLabels( 3781 pprof.WithLabels(context.Background(), pprof.Labels(labels...)), 3782 ) 3783 } 3784 } 3785 3786 func (s *Server) startGoRoutine(f func(), tags ...pprofLabels) bool { 3787 var started bool 3788 s.grMu.Lock() 3789 defer s.grMu.Unlock() 3790 if s.grRunning { 3791 s.grWG.Add(1) 3792 go func() { 3793 setGoRoutineLabels(tags...) 3794 f() 3795 }() 3796 started = true 3797 } 3798 return started 3799 } 3800 3801 func (s *Server) numClosedConns() int { 3802 s.mu.RLock() 3803 defer s.mu.RUnlock() 3804 return s.closed.len() 3805 } 3806 3807 func (s *Server) totalClosedConns() uint64 { 3808 s.mu.RLock() 3809 defer s.mu.RUnlock() 3810 return s.closed.totalConns() 3811 } 3812 3813 func (s *Server) closedClients() []*closedClient { 3814 s.mu.RLock() 3815 defer s.mu.RUnlock() 3816 return s.closed.closedClients() 3817 } 3818 3819 // getClientConnectURLs returns suitable URLs for clients to connect to the listen 3820 // port based on the server options' Host and Port. If the Host corresponds to 3821 // "any" interfaces, this call returns the list of resolved IP addresses. 3822 // If ClientAdvertise is set, returns the client advertise host and port. 3823 // The server lock is assumed held on entry. 3824 func (s *Server) getClientConnectURLs() []string { 3825 // Snapshot server options. 3826 opts := s.getOpts() 3827 // Ignore error here since we know that if there is client advertise, the 3828 // parseHostPort is correct because we did it right before calling this 3829 // function in Server.New(). 3830 urls, _ := s.getConnectURLs(opts.ClientAdvertise, opts.Host, opts.Port) 3831 return urls 3832 } 3833 3834 // Generic version that will return an array of URLs based on the given 3835 // advertise, host and port values. 3836 func (s *Server) getConnectURLs(advertise, host string, port int) ([]string, error) { 3837 urls := make([]string, 0, 1) 3838 3839 // short circuit if advertise is set 3840 if advertise != "" { 3841 h, p, err := parseHostPort(advertise, port) 3842 if err != nil { 3843 return nil, err 3844 } 3845 urls = append(urls, net.JoinHostPort(h, strconv.Itoa(p))) 3846 } else { 3847 sPort := strconv.Itoa(port) 3848 _, ips, err := s.getNonLocalIPsIfHostIsIPAny(host, true) 3849 for _, ip := range ips { 3850 urls = append(urls, net.JoinHostPort(ip, sPort)) 3851 } 3852 if err != nil || len(urls) == 0 { 3853 // We are here if s.opts.Host is not "0.0.0.0" nor "::", or if for some 3854 // reason we could not add any URL in the loop above. 3855 // We had a case where a Windows VM was hosed and would have err == nil 3856 // and not add any address in the array in the loop above, and we 3857 // ended-up returning 0.0.0.0, which is problematic for Windows clients. 3858 // Check for 0.0.0.0 or :: specifically, and ignore if that's the case. 3859 if host == "0.0.0.0" || host == "::" { 3860 s.Errorf("Address %q can not be resolved properly", host) 3861 } else { 3862 urls = append(urls, net.JoinHostPort(host, sPort)) 3863 } 3864 } 3865 } 3866 return urls, nil 3867 } 3868 3869 // Returns an array of non local IPs if the provided host is 3870 // 0.0.0.0 or ::. It returns the first resolved if `all` is 3871 // false. 3872 // The boolean indicate if the provided host was 0.0.0.0 (or ::) 3873 // so that if the returned array is empty caller can decide 3874 // what to do next. 3875 func (s *Server) getNonLocalIPsIfHostIsIPAny(host string, all bool) (bool, []string, error) { 3876 ip := net.ParseIP(host) 3877 // If this is not an IP, we are done 3878 if ip == nil { 3879 return false, nil, nil 3880 } 3881 // If this is not 0.0.0.0 or :: we have nothing to do. 3882 if !ip.IsUnspecified() { 3883 return false, nil, nil 3884 } 3885 s.Debugf("Get non local IPs for %q", host) 3886 var ips []string 3887 ifaces, _ := net.Interfaces() 3888 for _, i := range ifaces { 3889 addrs, _ := i.Addrs() 3890 for _, addr := range addrs { 3891 switch v := addr.(type) { 3892 case *net.IPNet: 3893 ip = v.IP 3894 case *net.IPAddr: 3895 ip = v.IP 3896 } 3897 ipStr := ip.String() 3898 // Skip non global unicast addresses 3899 if !ip.IsGlobalUnicast() || ip.IsUnspecified() { 3900 ip = nil 3901 continue 3902 } 3903 s.Debugf(" ip=%s", ipStr) 3904 ips = append(ips, ipStr) 3905 if !all { 3906 break 3907 } 3908 } 3909 } 3910 return true, ips, nil 3911 } 3912 3913 // if the ip is not specified, attempt to resolve it 3914 func resolveHostPorts(addr net.Listener) []string { 3915 hostPorts := make([]string, 0) 3916 hp := addr.Addr().(*net.TCPAddr) 3917 port := strconv.Itoa(hp.Port) 3918 if hp.IP.IsUnspecified() { 3919 var ip net.IP 3920 ifaces, _ := net.Interfaces() 3921 for _, i := range ifaces { 3922 addrs, _ := i.Addrs() 3923 for _, addr := range addrs { 3924 switch v := addr.(type) { 3925 case *net.IPNet: 3926 ip = v.IP 3927 hostPorts = append(hostPorts, net.JoinHostPort(ip.String(), port)) 3928 case *net.IPAddr: 3929 ip = v.IP 3930 hostPorts = append(hostPorts, net.JoinHostPort(ip.String(), port)) 3931 default: 3932 continue 3933 } 3934 } 3935 } 3936 } else { 3937 hostPorts = append(hostPorts, net.JoinHostPort(hp.IP.String(), port)) 3938 } 3939 return hostPorts 3940 } 3941 3942 // format the address of a net.Listener with a protocol 3943 func formatURL(protocol string, addr net.Listener) []string { 3944 hostports := resolveHostPorts(addr) 3945 for i, hp := range hostports { 3946 hostports[i] = fmt.Sprintf("%s://%s", protocol, hp) 3947 } 3948 return hostports 3949 } 3950 3951 // Ports describes URLs that the server can be contacted in 3952 type Ports struct { 3953 Nats []string `json:"nats,omitempty"` 3954 Monitoring []string `json:"monitoring,omitempty"` 3955 Cluster []string `json:"cluster,omitempty"` 3956 Profile []string `json:"profile,omitempty"` 3957 WebSocket []string `json:"websocket,omitempty"` 3958 } 3959 3960 // PortsInfo attempts to resolve all the ports. If after maxWait the ports are not 3961 // resolved, it returns nil. Otherwise it returns a Ports struct 3962 // describing ports where the server can be contacted 3963 func (s *Server) PortsInfo(maxWait time.Duration) *Ports { 3964 if s.readyForListeners(maxWait) { 3965 opts := s.getOpts() 3966 3967 s.mu.RLock() 3968 tls := s.info.TLSRequired 3969 listener := s.listener 3970 httpListener := s.http 3971 clusterListener := s.routeListener 3972 profileListener := s.profiler 3973 wsListener := s.websocket.listener 3974 wss := s.websocket.tls 3975 s.mu.RUnlock() 3976 3977 ports := Ports{} 3978 3979 if listener != nil { 3980 natsProto := "nats" 3981 if tls { 3982 natsProto = "tls" 3983 } 3984 ports.Nats = formatURL(natsProto, listener) 3985 } 3986 3987 if httpListener != nil { 3988 monProto := "http" 3989 if opts.HTTPSPort != 0 { 3990 monProto = "https" 3991 } 3992 ports.Monitoring = formatURL(monProto, httpListener) 3993 } 3994 3995 if clusterListener != nil { 3996 clusterProto := "nats" 3997 if opts.Cluster.TLSConfig != nil { 3998 clusterProto = "tls" 3999 } 4000 ports.Cluster = formatURL(clusterProto, clusterListener) 4001 } 4002 4003 if profileListener != nil { 4004 ports.Profile = formatURL("http", profileListener) 4005 } 4006 4007 if wsListener != nil { 4008 protocol := wsSchemePrefix 4009 if wss { 4010 protocol = wsSchemePrefixTLS 4011 } 4012 ports.WebSocket = formatURL(protocol, wsListener) 4013 } 4014 4015 return &ports 4016 } 4017 4018 return nil 4019 } 4020 4021 // Returns the portsFile. If a non-empty dirHint is provided, the dirHint 4022 // path is used instead of the server option value 4023 func (s *Server) portFile(dirHint string) string { 4024 dirname := s.getOpts().PortsFileDir 4025 if dirHint != "" { 4026 dirname = dirHint 4027 } 4028 if dirname == _EMPTY_ { 4029 return _EMPTY_ 4030 } 4031 return filepath.Join(dirname, fmt.Sprintf("%s_%d.ports", filepath.Base(os.Args[0]), os.Getpid())) 4032 } 4033 4034 // Delete the ports file. If a non-empty dirHint is provided, the dirHint 4035 // path is used instead of the server option value 4036 func (s *Server) deletePortsFile(hintDir string) { 4037 portsFile := s.portFile(hintDir) 4038 if portsFile != "" { 4039 if err := os.Remove(portsFile); err != nil { 4040 s.Errorf("Error cleaning up ports file %s: %v", portsFile, err) 4041 } 4042 } 4043 } 4044 4045 // Writes a file with a serialized Ports to the specified ports_file_dir. 4046 // The name of the file is `exename_pid.ports`, typically nats-server_pid.ports. 4047 // if ports file is not set, this function has no effect 4048 func (s *Server) logPorts() { 4049 opts := s.getOpts() 4050 portsFile := s.portFile(opts.PortsFileDir) 4051 if portsFile != _EMPTY_ { 4052 go func() { 4053 info := s.PortsInfo(5 * time.Second) 4054 if info == nil { 4055 s.Errorf("Unable to resolve the ports in the specified time") 4056 return 4057 } 4058 data, err := json.Marshal(info) 4059 if err != nil { 4060 s.Errorf("Error marshaling ports file: %v", err) 4061 return 4062 } 4063 if err := os.WriteFile(portsFile, data, 0666); err != nil { 4064 s.Errorf("Error writing ports file (%s): %v", portsFile, err) 4065 return 4066 } 4067 4068 }() 4069 } 4070 } 4071 4072 // waits until a calculated list of listeners is resolved or a timeout 4073 func (s *Server) readyForListeners(dur time.Duration) bool { 4074 end := time.Now().Add(dur) 4075 for time.Now().Before(end) { 4076 s.mu.RLock() 4077 listeners := s.serviceListeners() 4078 s.mu.RUnlock() 4079 if len(listeners) == 0 { 4080 return false 4081 } 4082 4083 ok := true 4084 for _, l := range listeners { 4085 if l == nil { 4086 ok = false 4087 break 4088 } 4089 } 4090 if ok { 4091 return true 4092 } 4093 select { 4094 case <-s.quitCh: 4095 return false 4096 case <-time.After(25 * time.Millisecond): 4097 // continue - unable to select from quit - we are still running 4098 } 4099 } 4100 return false 4101 } 4102 4103 // returns a list of listeners that are intended for the process 4104 // if the entry is nil, the interface is yet to be resolved 4105 func (s *Server) serviceListeners() []net.Listener { 4106 listeners := make([]net.Listener, 0) 4107 opts := s.getOpts() 4108 listeners = append(listeners, s.listener) 4109 if opts.Cluster.Port != 0 { 4110 listeners = append(listeners, s.routeListener) 4111 } 4112 if opts.HTTPPort != 0 || opts.HTTPSPort != 0 { 4113 listeners = append(listeners, s.http) 4114 } 4115 if opts.ProfPort != 0 { 4116 listeners = append(listeners, s.profiler) 4117 } 4118 if opts.Websocket.Port != 0 { 4119 listeners = append(listeners, s.websocket.listener) 4120 } 4121 return listeners 4122 } 4123 4124 // Returns true if in lame duck mode. 4125 func (s *Server) isLameDuckMode() bool { 4126 s.mu.RLock() 4127 defer s.mu.RUnlock() 4128 return s.ldm 4129 } 4130 4131 // This function will close the client listener then close the clients 4132 // at some interval to avoid a reconnect storm. 4133 // We will also transfer any raft leaders and shutdown JetStream. 4134 func (s *Server) lameDuckMode() { 4135 s.mu.Lock() 4136 // Check if there is actually anything to do 4137 if s.isShuttingDown() || s.ldm || s.listener == nil { 4138 s.mu.Unlock() 4139 return 4140 } 4141 s.Noticef("Entering lame duck mode, stop accepting new clients") 4142 s.ldm = true 4143 s.sendLDMShutdownEventLocked() 4144 expected := 1 4145 s.listener.Close() 4146 s.listener = nil 4147 if s.websocket.server != nil { 4148 expected++ 4149 s.websocket.server.Close() 4150 s.websocket.server = nil 4151 s.websocket.listener = nil 4152 } 4153 s.ldmCh = make(chan bool, expected) 4154 opts := s.getOpts() 4155 gp := opts.LameDuckGracePeriod 4156 // For tests, we want the grace period to be in some cases bigger 4157 // than the ldm duration, so to by-pass the validateOptions() check, 4158 // we use negative number and flip it here. 4159 if gp < 0 { 4160 gp *= -1 4161 } 4162 s.mu.Unlock() 4163 4164 // If we are running any raftNodes transfer leaders. 4165 if hadTransfers := s.transferRaftLeaders(); hadTransfers { 4166 // They will transfer leadership quickly, but wait here for a second. 4167 select { 4168 case <-time.After(time.Second): 4169 case <-s.quitCh: 4170 return 4171 } 4172 } 4173 4174 // Now check and shutdown jetstream. 4175 s.shutdownJetStream() 4176 4177 // Now shutdown the nodes 4178 s.shutdownRaftNodes() 4179 4180 // Wait for accept loops to be done to make sure that no new 4181 // client can connect 4182 for i := 0; i < expected; i++ { 4183 <-s.ldmCh 4184 } 4185 4186 s.mu.Lock() 4187 // Need to recheck few things 4188 if s.isShuttingDown() || len(s.clients) == 0 { 4189 s.mu.Unlock() 4190 // If there is no client, we need to call Shutdown() to complete 4191 // the LDMode. If server has been shutdown while lock was released, 4192 // calling Shutdown() should be no-op. 4193 s.Shutdown() 4194 return 4195 } 4196 dur := int64(opts.LameDuckDuration) 4197 dur -= int64(gp) 4198 if dur <= 0 { 4199 dur = int64(time.Second) 4200 } 4201 numClients := int64(len(s.clients)) 4202 batch := 1 4203 // Sleep interval between each client connection close. 4204 var si int64 4205 if numClients != 0 { 4206 si = dur / numClients 4207 } 4208 if si < 1 { 4209 // Should not happen (except in test with very small LD duration), but 4210 // if there are too many clients, batch the number of close and 4211 // use a tiny sleep interval that will result in yield likely. 4212 si = 1 4213 batch = int(numClients / dur) 4214 } else if si > int64(time.Second) { 4215 // Conversely, there is no need to sleep too long between clients 4216 // and spread say 10 clients for the 2min duration. Sleeping no 4217 // more than 1sec. 4218 si = int64(time.Second) 4219 } 4220 4221 // Now capture all clients 4222 clients := make([]*client, 0, len(s.clients)) 4223 for _, client := range s.clients { 4224 clients = append(clients, client) 4225 } 4226 // Now that we know that no new client can be accepted, 4227 // send INFO to routes and clients to notify this state. 4228 s.sendLDMToRoutes() 4229 s.sendLDMToClients() 4230 s.mu.Unlock() 4231 4232 t := time.NewTimer(gp) 4233 // Delay start of closing of client connections in case 4234 // we have several servers that we want to signal to enter LD mode 4235 // and not have their client reconnect to each other. 4236 select { 4237 case <-t.C: 4238 s.Noticef("Closing existing clients") 4239 case <-s.quitCh: 4240 t.Stop() 4241 return 4242 } 4243 for i, client := range clients { 4244 client.closeConnection(ServerShutdown) 4245 if i == len(clients)-1 { 4246 break 4247 } 4248 if batch == 1 || i%batch == 0 { 4249 // We pick a random interval which will be at least si/2 4250 v := rand.Int63n(si) 4251 if v < si/2 { 4252 v = si / 2 4253 } 4254 t.Reset(time.Duration(v)) 4255 // Sleep for given interval or bail out if kicked by Shutdown(). 4256 select { 4257 case <-t.C: 4258 case <-s.quitCh: 4259 t.Stop() 4260 return 4261 } 4262 } 4263 } 4264 s.Shutdown() 4265 } 4266 4267 // Send an INFO update to routes with the indication that this server is in LDM mode. 4268 // Server lock is held on entry. 4269 func (s *Server) sendLDMToRoutes() { 4270 s.routeInfo.LameDuckMode = true 4271 infoJSON := generateInfoJSON(&s.routeInfo) 4272 s.forEachRemote(func(r *client) { 4273 r.mu.Lock() 4274 r.enqueueProto(infoJSON) 4275 r.mu.Unlock() 4276 }) 4277 // Clear now so that we notify only once, should we have to send other INFOs. 4278 s.routeInfo.LameDuckMode = false 4279 } 4280 4281 // Send an INFO update to clients with the indication that this server is in 4282 // LDM mode and with only URLs of other nodes. 4283 // Server lock is held on entry. 4284 func (s *Server) sendLDMToClients() { 4285 s.info.LameDuckMode = true 4286 // Clear this so that if there are further updates, we don't send our URLs. 4287 s.clientConnectURLs = s.clientConnectURLs[:0] 4288 if s.websocket.connectURLs != nil { 4289 s.websocket.connectURLs = s.websocket.connectURLs[:0] 4290 } 4291 // Reset content first. 4292 s.info.ClientConnectURLs = s.info.ClientConnectURLs[:0] 4293 s.info.WSConnectURLs = s.info.WSConnectURLs[:0] 4294 // Only add the other nodes if we are allowed to. 4295 if !s.getOpts().Cluster.NoAdvertise { 4296 for url := range s.clientConnectURLsMap { 4297 s.info.ClientConnectURLs = append(s.info.ClientConnectURLs, url) 4298 } 4299 for url := range s.websocket.connectURLsMap { 4300 s.info.WSConnectURLs = append(s.info.WSConnectURLs, url) 4301 } 4302 } 4303 // Send to all registered clients that support async INFO protocols. 4304 s.sendAsyncInfoToClients(true, true) 4305 // We now clear the info.LameDuckMode flag so that if there are 4306 // cluster updates and we send the INFO, we don't have the boolean 4307 // set which would cause multiple LDM notifications to clients. 4308 s.info.LameDuckMode = false 4309 } 4310 4311 // If given error is a net.Error and is temporary, sleeps for the given 4312 // delay and double it, but cap it to ACCEPT_MAX_SLEEP. The sleep is 4313 // interrupted if the server is shutdown. 4314 // An error message is displayed depending on the type of error. 4315 // Returns the new (or unchanged) delay, or a negative value if the 4316 // server has been or is being shutdown. 4317 func (s *Server) acceptError(acceptName string, err error, tmpDelay time.Duration) time.Duration { 4318 if !s.isRunning() { 4319 return -1 4320 } 4321 //lint:ignore SA1019 We want to retry on a bunch of errors here. 4322 if ne, ok := err.(net.Error); ok && ne.Temporary() { // nolint:staticcheck 4323 s.Errorf("Temporary %s Accept Error(%v), sleeping %dms", acceptName, ne, tmpDelay/time.Millisecond) 4324 select { 4325 case <-time.After(tmpDelay): 4326 case <-s.quitCh: 4327 return -1 4328 } 4329 tmpDelay *= 2 4330 if tmpDelay > ACCEPT_MAX_SLEEP { 4331 tmpDelay = ACCEPT_MAX_SLEEP 4332 } 4333 } else { 4334 s.Errorf("%s Accept error: %v", acceptName, err) 4335 } 4336 return tmpDelay 4337 } 4338 4339 var errNoIPAvail = errors.New("no IP available") 4340 4341 func (s *Server) getRandomIP(resolver netResolver, url string, excludedAddresses map[string]struct{}) (string, error) { 4342 host, port, err := net.SplitHostPort(url) 4343 if err != nil { 4344 return "", err 4345 } 4346 // If already an IP, skip. 4347 if net.ParseIP(host) != nil { 4348 return url, nil 4349 } 4350 ips, err := resolver.LookupHost(context.Background(), host) 4351 if err != nil { 4352 return "", fmt.Errorf("lookup for host %q: %v", host, err) 4353 } 4354 if len(excludedAddresses) > 0 { 4355 for i := 0; i < len(ips); i++ { 4356 ip := ips[i] 4357 addr := net.JoinHostPort(ip, port) 4358 if _, excluded := excludedAddresses[addr]; excluded { 4359 if len(ips) == 1 { 4360 ips = nil 4361 break 4362 } 4363 ips[i] = ips[len(ips)-1] 4364 ips = ips[:len(ips)-1] 4365 i-- 4366 } 4367 } 4368 if len(ips) == 0 { 4369 return "", errNoIPAvail 4370 } 4371 } 4372 var address string 4373 if len(ips) == 0 { 4374 s.Warnf("Unable to get IP for %s, will try with %s: %v", host, url, err) 4375 address = url 4376 } else { 4377 var ip string 4378 if len(ips) == 1 { 4379 ip = ips[0] 4380 } else { 4381 ip = ips[rand.Int31n(int32(len(ips)))] 4382 } 4383 // add the port 4384 address = net.JoinHostPort(ip, port) 4385 } 4386 return address, nil 4387 } 4388 4389 // Returns true for the first attempt and depending on the nature 4390 // of the attempt (first connect or a reconnect), when the number 4391 // of attempts is equal to the configured report attempts. 4392 func (s *Server) shouldReportConnectErr(firstConnect bool, attempts int) bool { 4393 opts := s.getOpts() 4394 if firstConnect { 4395 if attempts == 1 || attempts%opts.ConnectErrorReports == 0 { 4396 return true 4397 } 4398 return false 4399 } 4400 if attempts == 1 || attempts%opts.ReconnectErrorReports == 0 { 4401 return true 4402 } 4403 return false 4404 } 4405 4406 func (s *Server) updateRemoteSubscription(acc *Account, sub *subscription, delta int32) { 4407 s.updateRouteSubscriptionMap(acc, sub, delta) 4408 if s.gateway.enabled { 4409 s.gatewayUpdateSubInterest(acc.Name, sub, delta) 4410 } 4411 4412 acc.updateLeafNodes(sub, delta) 4413 } 4414 4415 func (s *Server) startRateLimitLogExpiration() { 4416 interval := time.Second 4417 s.startGoRoutine(func() { 4418 defer s.grWG.Done() 4419 4420 ticker := time.NewTicker(time.Second) 4421 defer ticker.Stop() 4422 for { 4423 select { 4424 case <-s.quitCh: 4425 return 4426 case interval = <-s.rateLimitLoggingCh: 4427 ticker.Reset(interval) 4428 case <-ticker.C: 4429 s.rateLimitLogging.Range(func(k, v interface{}) bool { 4430 start := v.(time.Time) 4431 if time.Since(start) >= interval { 4432 s.rateLimitLogging.Delete(k) 4433 } 4434 return true 4435 }) 4436 } 4437 } 4438 }) 4439 } 4440 4441 func (s *Server) changeRateLimitLogInterval(d time.Duration) { 4442 if d <= 0 { 4443 return 4444 } 4445 select { 4446 case s.rateLimitLoggingCh <- d: 4447 default: 4448 } 4449 } 4450 4451 // DisconnectClientByID disconnects a client by connection ID 4452 func (s *Server) DisconnectClientByID(id uint64) error { 4453 client := s.clients[id] 4454 if client != nil { 4455 client.closeConnection(Kicked) 4456 return nil 4457 } 4458 return errors.New("no such client id") 4459 } 4460 4461 // LDMClientByID sends a Lame Duck Mode info message to a client by connection ID 4462 func (s *Server) LDMClientByID(id uint64) error { 4463 info := s.copyInfo() 4464 info.LameDuckMode = true 4465 4466 c := s.clients[id] 4467 if c != nil { 4468 c.mu.Lock() 4469 defer c.mu.Unlock() 4470 if c.opts.Protocol >= ClientProtoInfo && 4471 c.flags.isSet(firstPongSent) { 4472 // sendInfo takes care of checking if the connection is still 4473 // valid or not, so don't duplicate tests here. 4474 c.Debugf("sending Lame Duck Mode info to client") 4475 c.enqueueProto(c.generateClientInfoJSON(info)) 4476 return nil 4477 } else { 4478 return errors.New("ClientProtoInfo < ClientOps.Protocol or first pong not sent") 4479 } 4480 } 4481 return errors.New("no such client id") 4482 }