github.com/nats-io/nats-server/v2@v2.11.0-preview.2/server/events.go (about) 1 // Copyright 2018-2024 The NATS Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package server 15 16 import ( 17 "bytes" 18 "compress/gzip" 19 "crypto/sha256" 20 "crypto/x509" 21 "encoding/json" 22 "errors" 23 "fmt" 24 "math/rand" 25 "net/http" 26 "runtime" 27 "strconv" 28 "strings" 29 "sync" 30 "sync/atomic" 31 "time" 32 33 "github.com/klauspost/compress/s2" 34 35 "github.com/nats-io/jwt/v2" 36 "github.com/nats-io/nats-server/v2/server/certidp" 37 "github.com/nats-io/nats-server/v2/server/pse" 38 ) 39 40 const ( 41 accLookupReqTokens = 6 42 accLookupReqSubj = "$SYS.REQ.ACCOUNT.%s.CLAIMS.LOOKUP" 43 accPackReqSubj = "$SYS.REQ.CLAIMS.PACK" 44 accListReqSubj = "$SYS.REQ.CLAIMS.LIST" 45 accClaimsReqSubj = "$SYS.REQ.CLAIMS.UPDATE" 46 accDeleteReqSubj = "$SYS.REQ.CLAIMS.DELETE" 47 48 connectEventSubj = "$SYS.ACCOUNT.%s.CONNECT" 49 disconnectEventSubj = "$SYS.ACCOUNT.%s.DISCONNECT" 50 accDirectReqSubj = "$SYS.REQ.ACCOUNT.%s.%s" 51 accPingReqSubj = "$SYS.REQ.ACCOUNT.PING.%s" // atm. only used for STATZ and CONNZ import from system account 52 // kept for backward compatibility when using http resolver 53 // this overlaps with the names for events but you'd have to have the operator private key in order to succeed. 54 accUpdateEventSubjOld = "$SYS.ACCOUNT.%s.CLAIMS.UPDATE" 55 accUpdateEventSubjNew = "$SYS.REQ.ACCOUNT.%s.CLAIMS.UPDATE" 56 connsRespSubj = "$SYS._INBOX_.%s" 57 accConnsEventSubjNew = "$SYS.ACCOUNT.%s.SERVER.CONNS" 58 accConnsEventSubjOld = "$SYS.SERVER.ACCOUNT.%s.CONNS" // kept for backward compatibility 59 lameDuckEventSubj = "$SYS.SERVER.%s.LAMEDUCK" 60 shutdownEventSubj = "$SYS.SERVER.%s.SHUTDOWN" 61 clientKickReqSubj = "$SYS.REQ.SERVER.%s.KICK" 62 clientLDMReqSubj = "$SYS.REQ.SERVER.%s.LDM" 63 authErrorEventSubj = "$SYS.SERVER.%s.CLIENT.AUTH.ERR" 64 authErrorAccountEventSubj = "$SYS.ACCOUNT.CLIENT.AUTH.ERR" 65 serverStatsSubj = "$SYS.SERVER.%s.STATSZ" 66 serverDirectReqSubj = "$SYS.REQ.SERVER.%s.%s" 67 serverPingReqSubj = "$SYS.REQ.SERVER.PING.%s" 68 serverStatsPingReqSubj = "$SYS.REQ.SERVER.PING" // use $SYS.REQ.SERVER.PING.STATSZ instead 69 serverReloadReqSubj = "$SYS.REQ.SERVER.%s.RELOAD" // with server ID 70 leafNodeConnectEventSubj = "$SYS.ACCOUNT.%s.LEAFNODE.CONNECT" // for internal use only 71 remoteLatencyEventSubj = "$SYS.LATENCY.M2.%s" 72 inboxRespSubj = "$SYS._INBOX.%s.%s" 73 74 // Used to return information to a user on bound account and user permissions. 75 userDirectInfoSubj = "$SYS.REQ.USER.INFO" 76 userDirectReqSubj = "$SYS.REQ.USER.%s.INFO" 77 78 // FIXME(dlc) - Should account scope, even with wc for now, but later on 79 // we can then shard as needed. 80 accNumSubsReqSubj = "$SYS.REQ.ACCOUNT.NSUBS" 81 82 // These are for exported debug services. These are local to this server only. 83 accSubsSubj = "$SYS.DEBUG.SUBSCRIBERS" 84 85 shutdownEventTokens = 4 86 serverSubjectIndex = 2 87 accUpdateTokensNew = 6 88 accUpdateTokensOld = 5 89 accUpdateAccIdxOld = 2 90 91 accReqTokens = 5 92 accReqAccIndex = 3 93 94 ocspPeerRejectEventSubj = "$SYS.SERVER.%s.OCSP.PEER.CONN.REJECT" 95 ocspPeerChainlinkInvalidEventSubj = "$SYS.SERVER.%s.OCSP.PEER.LINK.INVALID" 96 ) 97 98 // FIXME(dlc) - make configurable. 99 var eventsHBInterval = 30 * time.Second 100 101 type sysMsgHandler func(sub *subscription, client *client, acc *Account, subject, reply string, hdr, msg []byte) 102 103 // Used if we have to queue things internally to avoid the route/gw path. 104 type inSysMsg struct { 105 sub *subscription 106 c *client 107 acc *Account 108 subj string 109 rply string 110 hdr []byte 111 msg []byte 112 cb sysMsgHandler 113 } 114 115 // Used to send and receive messages from inside the server. 116 type internal struct { 117 account *Account 118 client *client 119 seq uint64 120 sid int 121 servers map[string]*serverUpdate 122 sweeper *time.Timer 123 stmr *time.Timer 124 replies map[string]msgHandler 125 sendq *ipQueue[*pubMsg] 126 recvq *ipQueue[*inSysMsg] 127 resetCh chan struct{} 128 wg sync.WaitGroup 129 sq *sendq 130 orphMax time.Duration 131 chkOrph time.Duration 132 statsz time.Duration 133 cstatsz time.Duration 134 shash string 135 inboxPre string 136 remoteStatsSub *subscription 137 } 138 139 // ServerStatsMsg is sent periodically with stats updates. 140 type ServerStatsMsg struct { 141 Server ServerInfo `json:"server"` 142 Stats ServerStats `json:"statsz"` 143 } 144 145 // ConnectEventMsg is sent when a new connection is made that is part of an account. 146 type ConnectEventMsg struct { 147 TypedEvent 148 Server ServerInfo `json:"server"` 149 Client ClientInfo `json:"client"` 150 } 151 152 // ConnectEventMsgType is the schema type for ConnectEventMsg 153 const ConnectEventMsgType = "io.nats.server.advisory.v1.client_connect" 154 155 // DisconnectEventMsg is sent when a new connection previously defined from a 156 // ConnectEventMsg is closed. 157 type DisconnectEventMsg struct { 158 TypedEvent 159 Server ServerInfo `json:"server"` 160 Client ClientInfo `json:"client"` 161 Sent DataStats `json:"sent"` 162 Received DataStats `json:"received"` 163 Reason string `json:"reason"` 164 } 165 166 // DisconnectEventMsgType is the schema type for DisconnectEventMsg 167 const DisconnectEventMsgType = "io.nats.server.advisory.v1.client_disconnect" 168 169 // OCSPPeerRejectEventMsg is sent when a peer TLS handshake is ultimately rejected due to OCSP invalidation. 170 // A "peer" can be an inbound client connection or a leaf connection to a remote server. Peer in event payload 171 // is always the peer's (TLS) leaf cert, which may or may be the invalid cert (See also OCSPPeerChainlinkInvalidEventMsg) 172 type OCSPPeerRejectEventMsg struct { 173 TypedEvent 174 Kind string `json:"kind"` 175 Peer certidp.CertInfo `json:"peer"` 176 Server ServerInfo `json:"server"` 177 Reason string `json:"reason"` 178 } 179 180 // OCSPPeerRejectEventMsgType is the schema type for OCSPPeerRejectEventMsg 181 const OCSPPeerRejectEventMsgType = "io.nats.server.advisory.v1.ocsp_peer_reject" 182 183 // OCSPPeerChainlinkInvalidEventMsg is sent when a certificate (link) in a valid TLS chain is found to be OCSP invalid 184 // during a peer TLS handshake. A "peer" can be an inbound client connection or a leaf connection to a remote server. 185 // Peer and Link may be the same if the invalid cert was the peer's leaf cert 186 type OCSPPeerChainlinkInvalidEventMsg struct { 187 TypedEvent 188 Link certidp.CertInfo `json:"link"` 189 Peer certidp.CertInfo `json:"peer"` 190 Server ServerInfo `json:"server"` 191 Reason string `json:"reason"` 192 } 193 194 // OCSPPeerChainlinkInvalidEventMsgType is the schema type for OCSPPeerChainlinkInvalidEventMsg 195 const OCSPPeerChainlinkInvalidEventMsgType = "io.nats.server.advisory.v1.ocsp_peer_link_invalid" 196 197 // AccountNumConns is an event that will be sent from a server that is tracking 198 // a given account when the number of connections changes. It will also HB 199 // updates in the absence of any changes. 200 type AccountNumConns struct { 201 TypedEvent 202 Server ServerInfo `json:"server"` 203 AccountStat 204 } 205 206 // AccountStat contains the data common between AccountNumConns and AccountStatz 207 type AccountStat struct { 208 Account string `json:"acc"` 209 Name string `json:"name"` 210 Conns int `json:"conns"` 211 LeafNodes int `json:"leafnodes"` 212 TotalConns int `json:"total_conns"` 213 NumSubs uint32 `json:"num_subscriptions"` 214 Sent DataStats `json:"sent"` 215 Received DataStats `json:"received"` 216 SlowConsumers int64 `json:"slow_consumers"` 217 } 218 219 const AccountNumConnsMsgType = "io.nats.server.advisory.v1.account_connections" 220 221 // accNumConnsReq is sent when we are starting to track an account for the first 222 // time. We will request others send info to us about their local state. 223 type accNumConnsReq struct { 224 Server ServerInfo `json:"server"` 225 Account string `json:"acc"` 226 } 227 228 // ServerID is basic static info for a server. 229 type ServerID struct { 230 Name string `json:"name"` 231 Host string `json:"host"` 232 ID string `json:"id"` 233 } 234 235 // Type for our server capabilities. 236 type ServerCapability uint64 237 238 // ServerInfo identifies remote servers. 239 type ServerInfo struct { 240 Name string `json:"name"` 241 Host string `json:"host"` 242 ID string `json:"id"` 243 Cluster string `json:"cluster,omitempty"` 244 Domain string `json:"domain,omitempty"` 245 Version string `json:"ver"` 246 Tags []string `json:"tags,omitempty"` 247 // Whether JetStream is enabled (deprecated in favor of the `ServerCapability`). 248 JetStream bool `json:"jetstream"` 249 // Generic capability flags 250 Flags ServerCapability `json:"flags"` 251 // Sequence and Time from the remote server for this message. 252 Seq uint64 `json:"seq"` 253 Time time.Time `json:"time"` 254 } 255 256 const ( 257 JetStreamEnabled ServerCapability = 1 << iota // Server had JetStream enabled. 258 BinaryStreamSnapshot // New stream snapshot capability. 259 ) 260 261 // Set JetStream capability. 262 func (si *ServerInfo) SetJetStreamEnabled() { 263 si.Flags |= JetStreamEnabled 264 // Still set old version. 265 si.JetStream = true 266 } 267 268 // JetStreamEnabled indicates whether or not we have JetStream enabled. 269 func (si *ServerInfo) JetStreamEnabled() bool { 270 // Take into account old version. 271 return si.Flags&JetStreamEnabled != 0 || si.JetStream 272 } 273 274 // Set binary stream snapshot capability. 275 func (si *ServerInfo) SetBinaryStreamSnapshot() { 276 si.Flags |= BinaryStreamSnapshot 277 } 278 279 // JetStreamEnabled indicates whether or not we have binary stream snapshot capbilities. 280 func (si *ServerInfo) BinaryStreamSnapshot() bool { 281 return si.Flags&BinaryStreamSnapshot != 0 282 } 283 284 // ClientInfo is detailed information about the client forming a connection. 285 type ClientInfo struct { 286 Start *time.Time `json:"start,omitempty"` 287 Host string `json:"host,omitempty"` 288 ID uint64 `json:"id,omitempty"` 289 Account string `json:"acc,omitempty"` 290 Service string `json:"svc,omitempty"` 291 User string `json:"user,omitempty"` 292 Name string `json:"name,omitempty"` 293 Lang string `json:"lang,omitempty"` 294 Version string `json:"ver,omitempty"` 295 RTT time.Duration `json:"rtt,omitempty"` 296 Server string `json:"server,omitempty"` 297 Cluster string `json:"cluster,omitempty"` 298 Alternates []string `json:"alts,omitempty"` 299 Stop *time.Time `json:"stop,omitempty"` 300 Jwt string `json:"jwt,omitempty"` 301 IssuerKey string `json:"issuer_key,omitempty"` 302 NameTag string `json:"name_tag,omitempty"` 303 Tags jwt.TagList `json:"tags,omitempty"` 304 Kind string `json:"kind,omitempty"` 305 ClientType string `json:"client_type,omitempty"` 306 MQTTClient string `json:"client_id,omitempty"` // This is the MQTT client ID 307 Nonce string `json:"nonce,omitempty"` 308 } 309 310 // ServerStats hold various statistics that we will periodically send out. 311 type ServerStats struct { 312 Start time.Time `json:"start"` 313 Mem int64 `json:"mem"` 314 Cores int `json:"cores"` 315 CPU float64 `json:"cpu"` 316 Connections int `json:"connections"` 317 TotalConnections uint64 `json:"total_connections"` 318 ActiveAccounts int `json:"active_accounts"` 319 NumSubs uint32 `json:"subscriptions"` 320 Sent DataStats `json:"sent"` 321 Received DataStats `json:"received"` 322 SlowConsumers int64 `json:"slow_consumers"` 323 Routes []*RouteStat `json:"routes,omitempty"` 324 Gateways []*GatewayStat `json:"gateways,omitempty"` 325 ActiveServers int `json:"active_servers,omitempty"` 326 JetStream *JetStreamVarz `json:"jetstream,omitempty"` 327 } 328 329 // RouteStat holds route statistics. 330 type RouteStat struct { 331 ID uint64 `json:"rid"` 332 Name string `json:"name,omitempty"` 333 Sent DataStats `json:"sent"` 334 Received DataStats `json:"received"` 335 Pending int `json:"pending"` 336 } 337 338 // GatewayStat holds gateway statistics. 339 type GatewayStat struct { 340 ID uint64 `json:"gwid"` 341 Name string `json:"name"` 342 Sent DataStats `json:"sent"` 343 Received DataStats `json:"received"` 344 NumInbound int `json:"inbound_connections"` 345 } 346 347 // DataStats reports how may msg and bytes. Applicable for both sent and received. 348 type DataStats struct { 349 Msgs int64 `json:"msgs"` 350 Bytes int64 `json:"bytes"` 351 } 352 353 // Used for internally queueing up messages that the server wants to send. 354 type pubMsg struct { 355 c *client 356 sub string 357 rply string 358 si *ServerInfo 359 hdr map[string]string 360 msg any 361 oct compressionType 362 echo bool 363 last bool 364 } 365 366 var pubMsgPool sync.Pool 367 368 func newPubMsg(c *client, sub, rply string, si *ServerInfo, hdr map[string]string, 369 msg any, oct compressionType, echo, last bool) *pubMsg { 370 371 var m *pubMsg 372 pm := pubMsgPool.Get() 373 if pm != nil { 374 m = pm.(*pubMsg) 375 } else { 376 m = &pubMsg{} 377 } 378 // When getting something from a pool it is critical that all fields are 379 // initialized. Doing this way guarantees that if someone adds a field to 380 // the structure, the compiler will fail the build if this line is not updated. 381 (*m) = pubMsg{c, sub, rply, si, hdr, msg, oct, echo, last} 382 return m 383 } 384 385 func (pm *pubMsg) returnToPool() { 386 if pm == nil { 387 return 388 } 389 pm.c, pm.sub, pm.rply, pm.si, pm.hdr, pm.msg = nil, _EMPTY_, _EMPTY_, nil, nil, nil 390 pubMsgPool.Put(pm) 391 } 392 393 // Used to track server updates. 394 type serverUpdate struct { 395 seq uint64 396 ltime time.Time 397 } 398 399 // TypedEvent is a event or advisory sent by the server that has nats type hints 400 // typically used for events that might be consumed by 3rd party event systems 401 type TypedEvent struct { 402 Type string `json:"type"` 403 ID string `json:"id"` 404 Time time.Time `json:"timestamp"` 405 } 406 407 // internalReceiveLoop will be responsible for dispatching all messages that 408 // a server receives and needs to internally process, e.g. internal subs. 409 func (s *Server) internalReceiveLoop() { 410 s.mu.RLock() 411 if s.sys == nil || s.sys.recvq == nil { 412 s.mu.RUnlock() 413 return 414 } 415 recvq := s.sys.recvq 416 s.mu.RUnlock() 417 418 for s.eventsRunning() { 419 select { 420 case <-recvq.ch: 421 msgs := recvq.pop() 422 for _, m := range msgs { 423 if m.cb != nil { 424 m.cb(m.sub, m.c, m.acc, m.subj, m.rply, m.hdr, m.msg) 425 } 426 } 427 recvq.recycle(&msgs) 428 case <-s.quitCh: 429 return 430 } 431 } 432 } 433 434 // internalSendLoop will be responsible for serializing all messages that 435 // a server wants to send. 436 func (s *Server) internalSendLoop(wg *sync.WaitGroup) { 437 defer wg.Done() 438 439 RESET: 440 s.mu.RLock() 441 if s.sys == nil || s.sys.sendq == nil { 442 s.mu.RUnlock() 443 return 444 } 445 sysc := s.sys.client 446 resetCh := s.sys.resetCh 447 sendq := s.sys.sendq 448 id := s.info.ID 449 host := s.info.Host 450 servername := s.info.Name 451 domain := s.info.Domain 452 seqp := &s.sys.seq 453 js := s.info.JetStream 454 cluster := s.info.Cluster 455 if s.gateway.enabled { 456 cluster = s.getGatewayName() 457 } 458 s.mu.RUnlock() 459 460 // Grab tags. 461 tags := s.getOpts().Tags 462 463 for s.eventsRunning() { 464 select { 465 case <-sendq.ch: 466 msgs := sendq.pop() 467 for _, pm := range msgs { 468 if si := pm.si; si != nil { 469 si.Name = servername 470 si.Domain = domain 471 si.Host = host 472 si.Cluster = cluster 473 si.ID = id 474 si.Seq = atomic.AddUint64(seqp, 1) 475 si.Version = VERSION 476 si.Time = time.Now().UTC() 477 si.Tags = tags 478 if js { 479 // New capability based flags. 480 si.SetJetStreamEnabled() 481 si.SetBinaryStreamSnapshot() 482 } 483 } 484 var b []byte 485 if pm.msg != nil { 486 switch v := pm.msg.(type) { 487 case string: 488 b = []byte(v) 489 case []byte: 490 b = v 491 default: 492 b, _ = json.Marshal(pm.msg) 493 } 494 } 495 // Setup our client. If the user wants to use a non-system account use our internal 496 // account scoped here so that we are not changing out accounts for the system client. 497 var c *client 498 if pm.c != nil { 499 c = pm.c 500 } else { 501 c = sysc 502 } 503 504 // Grab client lock. 505 c.mu.Lock() 506 507 // Prep internal structures needed to send message. 508 c.pa.subject, c.pa.reply = []byte(pm.sub), []byte(pm.rply) 509 c.pa.size, c.pa.szb = len(b), []byte(strconv.FormatInt(int64(len(b)), 10)) 510 c.pa.hdr, c.pa.hdb = -1, nil 511 trace := c.trace 512 513 // Now check for optional compression. 514 var contentHeader string 515 var bb bytes.Buffer 516 517 if len(b) > 0 { 518 switch pm.oct { 519 case gzipCompression: 520 zw := gzip.NewWriter(&bb) 521 zw.Write(b) 522 zw.Close() 523 b = bb.Bytes() 524 contentHeader = "gzip" 525 case snappyCompression: 526 sw := s2.NewWriter(&bb, s2.WriterSnappyCompat()) 527 sw.Write(b) 528 sw.Close() 529 b = bb.Bytes() 530 contentHeader = "snappy" 531 case unsupportedCompression: 532 contentHeader = "identity" 533 } 534 } 535 // Optional Echo 536 replaceEcho := c.echo != pm.echo 537 if replaceEcho { 538 c.echo = !c.echo 539 } 540 c.mu.Unlock() 541 542 // Add in NL 543 b = append(b, _CRLF_...) 544 545 // Check if we should set content-encoding 546 if contentHeader != _EMPTY_ { 547 b = c.setHeader(contentEncodingHeader, contentHeader, b) 548 } 549 550 // Optional header processing. 551 if pm.hdr != nil { 552 for k, v := range pm.hdr { 553 b = c.setHeader(k, v, b) 554 } 555 } 556 // Tracing 557 if trace { 558 c.traceInOp(fmt.Sprintf("PUB %s %s %d", c.pa.subject, c.pa.reply, c.pa.size), nil) 559 c.traceMsg(b) 560 } 561 562 // Process like a normal inbound msg. 563 c.processInboundClientMsg(b) 564 565 // Put echo back if needed. 566 if replaceEcho { 567 c.mu.Lock() 568 c.echo = !c.echo 569 c.mu.Unlock() 570 } 571 572 // See if we are doing graceful shutdown. 573 if !pm.last { 574 c.flushClients(0) // Never spend time in place. 575 } else { 576 // For the Shutdown event, we need to send in place otherwise 577 // there is a chance that the process will exit before the 578 // writeLoop has a chance to send it. 579 c.flushClients(time.Second) 580 sendq.recycle(&msgs) 581 return 582 } 583 pm.returnToPool() 584 } 585 sendq.recycle(&msgs) 586 case <-resetCh: 587 goto RESET 588 case <-s.quitCh: 589 return 590 } 591 } 592 } 593 594 // Will send a shutdown message for lame-duck. Unlike sendShutdownEvent, this will 595 // not close off the send queue or reply handler, as we may still have a workload 596 // that needs migrating off. 597 // Lock should be held. 598 func (s *Server) sendLDMShutdownEventLocked() { 599 if s.sys == nil || s.sys.sendq == nil { 600 return 601 } 602 subj := fmt.Sprintf(lameDuckEventSubj, s.info.ID) 603 si := &ServerInfo{} 604 s.sys.sendq.push(newPubMsg(nil, subj, _EMPTY_, si, nil, si, noCompression, false, true)) 605 } 606 607 // Will send a shutdown message. 608 func (s *Server) sendShutdownEvent() { 609 s.mu.Lock() 610 if s.sys == nil || s.sys.sendq == nil { 611 s.mu.Unlock() 612 return 613 } 614 subj := fmt.Sprintf(shutdownEventSubj, s.info.ID) 615 sendq := s.sys.sendq 616 // Stop any more messages from queueing up. 617 s.sys.sendq = nil 618 // Unhook all msgHandlers. Normal client cleanup will deal with subs, etc. 619 s.sys.replies = nil 620 // Send to the internal queue and mark as last. 621 si := &ServerInfo{} 622 sendq.push(newPubMsg(nil, subj, _EMPTY_, si, nil, si, noCompression, false, true)) 623 s.mu.Unlock() 624 } 625 626 // Used to send an internal message to an arbitrary account. 627 func (s *Server) sendInternalAccountMsg(a *Account, subject string, msg any) error { 628 return s.sendInternalAccountMsgWithReply(a, subject, _EMPTY_, nil, msg, false) 629 } 630 631 // Used to send an internal message with an optional reply to an arbitrary account. 632 func (s *Server) sendInternalAccountMsgWithReply(a *Account, subject, reply string, hdr map[string]string, msg any, echo bool) error { 633 s.mu.RLock() 634 if s.sys == nil || s.sys.sendq == nil { 635 s.mu.RUnlock() 636 return ErrNoSysAccount 637 } 638 c := s.sys.client 639 // Replace our client with the account's internal client. 640 if a != nil { 641 a.mu.Lock() 642 c = a.internalClient() 643 a.mu.Unlock() 644 } 645 s.sys.sendq.push(newPubMsg(c, subject, reply, nil, hdr, msg, noCompression, echo, false)) 646 s.mu.RUnlock() 647 return nil 648 } 649 650 // Send system style message to an account scope. 651 func (s *Server) sendInternalAccountSysMsg(a *Account, subj string, si *ServerInfo, msg any, ct compressionType) { 652 s.mu.RLock() 653 if s.sys == nil || s.sys.sendq == nil || a == nil { 654 s.mu.RUnlock() 655 return 656 } 657 sendq := s.sys.sendq 658 s.mu.RUnlock() 659 660 a.mu.Lock() 661 c := a.internalClient() 662 a.mu.Unlock() 663 664 sendq.push(newPubMsg(c, subj, _EMPTY_, si, nil, msg, ct, false, false)) 665 } 666 667 // This will queue up a message to be sent. 668 // Lock should not be held. 669 func (s *Server) sendInternalMsgLocked(subj, rply string, si *ServerInfo, msg any) { 670 s.mu.RLock() 671 s.sendInternalMsg(subj, rply, si, msg) 672 s.mu.RUnlock() 673 } 674 675 // This will queue up a message to be sent. 676 // Assumes lock is held on entry. 677 func (s *Server) sendInternalMsg(subj, rply string, si *ServerInfo, msg any) { 678 if s.sys == nil || s.sys.sendq == nil { 679 return 680 } 681 s.sys.sendq.push(newPubMsg(nil, subj, rply, si, nil, msg, noCompression, false, false)) 682 } 683 684 // Will send an api response. 685 func (s *Server) sendInternalResponse(subj string, response *ServerAPIResponse) { 686 s.mu.RLock() 687 if s.sys == nil || s.sys.sendq == nil { 688 s.mu.RUnlock() 689 return 690 } 691 s.sys.sendq.push(newPubMsg(nil, subj, _EMPTY_, response.Server, nil, response, response.compress, false, false)) 692 s.mu.RUnlock() 693 } 694 695 // Used to send internal messages from other system clients to avoid no echo issues. 696 func (c *client) sendInternalMsg(subj, rply string, si *ServerInfo, msg any) { 697 if c == nil { 698 return 699 } 700 s := c.srv 701 if s == nil { 702 return 703 } 704 s.mu.RLock() 705 if s.sys == nil || s.sys.sendq == nil { 706 s.mu.RUnlock() 707 return 708 } 709 s.sys.sendq.push(newPubMsg(c, subj, rply, si, nil, msg, noCompression, false, false)) 710 s.mu.RUnlock() 711 } 712 713 // Locked version of checking if events system running. Also checks server. 714 func (s *Server) eventsRunning() bool { 715 if s == nil { 716 return false 717 } 718 s.mu.RLock() 719 er := s.isRunning() && s.eventsEnabled() 720 s.mu.RUnlock() 721 return er 722 } 723 724 // EventsEnabled will report if the server has internal events enabled via 725 // a defined system account. 726 func (s *Server) EventsEnabled() bool { 727 s.mu.RLock() 728 defer s.mu.RUnlock() 729 return s.eventsEnabled() 730 } 731 732 // eventsEnabled will report if events are enabled. 733 // Lock should be held. 734 func (s *Server) eventsEnabled() bool { 735 return s.sys != nil && s.sys.client != nil && s.sys.account != nil 736 } 737 738 // TrackedRemoteServers returns how many remote servers we are tracking 739 // from a system events perspective. 740 func (s *Server) TrackedRemoteServers() int { 741 s.mu.RLock() 742 defer s.mu.RUnlock() 743 if !s.isRunning() || !s.eventsEnabled() { 744 return -1 745 } 746 return len(s.sys.servers) 747 } 748 749 // Check for orphan servers who may have gone away without notification. 750 // This should be wrapChk() to setup common locking. 751 func (s *Server) checkRemoteServers() { 752 now := time.Now() 753 for sid, su := range s.sys.servers { 754 if now.Sub(su.ltime) > s.sys.orphMax { 755 s.Debugf("Detected orphan remote server: %q", sid) 756 // Simulate it going away. 757 s.processRemoteServerShutdown(sid) 758 } 759 } 760 if s.sys.sweeper != nil { 761 s.sys.sweeper.Reset(s.sys.chkOrph) 762 } 763 } 764 765 // Grab RSS and PCPU 766 // Server lock will be held but released. 767 func (s *Server) updateServerUsage(v *ServerStats) { 768 var vss int64 769 pse.ProcUsage(&v.CPU, &v.Mem, &vss) 770 v.Cores = runtime.NumCPU() 771 } 772 773 // Generate a route stat for our statz update. 774 func routeStat(r *client) *RouteStat { 775 if r == nil { 776 return nil 777 } 778 r.mu.Lock() 779 // Note: *client.out[Msgs|Bytes] are not set using atomics, 780 // unlike in[Msgs|Bytes]. 781 rs := &RouteStat{ 782 ID: r.cid, 783 Sent: DataStats{ 784 Msgs: r.outMsgs, 785 Bytes: r.outBytes, 786 }, 787 Received: DataStats{ 788 Msgs: atomic.LoadInt64(&r.inMsgs), 789 Bytes: atomic.LoadInt64(&r.inBytes), 790 }, 791 Pending: int(r.out.pb), 792 } 793 if r.route != nil { 794 rs.Name = r.route.remoteName 795 } 796 r.mu.Unlock() 797 return rs 798 } 799 800 // Actual send method for statz updates. 801 // Lock should be held. 802 func (s *Server) sendStatsz(subj string) { 803 var m ServerStatsMsg 804 s.updateServerUsage(&m.Stats) 805 806 s.mu.RLock() 807 defer s.mu.RUnlock() 808 809 // Check that we have a system account, etc. 810 if s.sys == nil || s.sys.account == nil { 811 return 812 } 813 814 shouldCheckInterest := func() bool { 815 opts := s.getOpts() 816 if opts.Cluster.Port != 0 || opts.Gateway.Port != 0 || opts.LeafNode.Port != 0 { 817 return false 818 } 819 // If we are here we have no clustering or gateways and are not a leafnode hub. 820 // Check for leafnode remotes that connect the system account. 821 if len(opts.LeafNode.Remotes) > 0 { 822 sysAcc := s.sys.account.GetName() 823 for _, r := range opts.LeafNode.Remotes { 824 if r.LocalAccount == sysAcc { 825 return false 826 } 827 } 828 } 829 return true 830 } 831 832 // if we are running standalone, check for interest. 833 if shouldCheckInterest() { 834 // Check if we even have interest in this subject. 835 sacc := s.sys.account 836 rr := sacc.sl.Match(subj) 837 totalSubs := len(rr.psubs) + len(rr.qsubs) 838 if totalSubs == 0 { 839 return 840 } else if totalSubs == 1 && len(rr.psubs) == 1 { 841 // For the broadcast subject we listen to that ourselves with no echo for remote updates. 842 // If we are the only ones listening do not send either. 843 if rr.psubs[0] == s.sys.remoteStatsSub { 844 return 845 } 846 } 847 } 848 849 m.Stats.Start = s.start 850 m.Stats.Connections = len(s.clients) 851 m.Stats.TotalConnections = s.totalClients 852 m.Stats.ActiveAccounts = int(atomic.LoadInt32(&s.activeAccounts)) 853 m.Stats.Received.Msgs = atomic.LoadInt64(&s.inMsgs) 854 m.Stats.Received.Bytes = atomic.LoadInt64(&s.inBytes) 855 m.Stats.Sent.Msgs = atomic.LoadInt64(&s.outMsgs) 856 m.Stats.Sent.Bytes = atomic.LoadInt64(&s.outBytes) 857 m.Stats.SlowConsumers = atomic.LoadInt64(&s.slowConsumers) 858 m.Stats.NumSubs = s.numSubscriptions() 859 // Routes 860 s.forEachRoute(func(r *client) { 861 m.Stats.Routes = append(m.Stats.Routes, routeStat(r)) 862 }) 863 // Gateways 864 if s.gateway.enabled { 865 gw := s.gateway 866 gw.RLock() 867 for name, c := range gw.out { 868 gs := &GatewayStat{Name: name} 869 c.mu.Lock() 870 gs.ID = c.cid 871 // Note that *client.out[Msgs|Bytes] are not set using atomic, 872 // unlike the in[Msgs|bytes]. 873 gs.Sent = DataStats{ 874 Msgs: c.outMsgs, 875 Bytes: c.outBytes, 876 } 877 c.mu.Unlock() 878 // Gather matching inbound connections 879 gs.Received = DataStats{} 880 for _, c := range gw.in { 881 c.mu.Lock() 882 if c.gw.name == name { 883 gs.Received.Msgs += atomic.LoadInt64(&c.inMsgs) 884 gs.Received.Bytes += atomic.LoadInt64(&c.inBytes) 885 gs.NumInbound++ 886 } 887 c.mu.Unlock() 888 } 889 m.Stats.Gateways = append(m.Stats.Gateways, gs) 890 } 891 gw.RUnlock() 892 } 893 // Active Servers 894 m.Stats.ActiveServers = len(s.sys.servers) + 1 895 896 // JetStream 897 if js := s.js.Load(); js != nil { 898 jStat := &JetStreamVarz{} 899 s.mu.RUnlock() 900 js.mu.RLock() 901 c := js.config 902 c.StoreDir = _EMPTY_ 903 jStat.Config = &c 904 js.mu.RUnlock() 905 jStat.Stats = js.usageStats() 906 // Update our own usage since we do not echo so we will not hear ourselves. 907 ourNode := getHash(s.serverName()) 908 if v, ok := s.nodeToInfo.Load(ourNode); ok && v != nil { 909 ni := v.(nodeInfo) 910 ni.stats = jStat.Stats 911 ni.cfg = jStat.Config 912 s.optsMu.RLock() 913 ni.tags = copyStrings(s.opts.Tags) 914 s.optsMu.RUnlock() 915 s.nodeToInfo.Store(ourNode, ni) 916 } 917 // Metagroup info. 918 if mg := js.getMetaGroup(); mg != nil { 919 if mg.Leader() { 920 if ci := s.raftNodeToClusterInfo(mg); ci != nil { 921 jStat.Meta = &MetaClusterInfo{ 922 Name: ci.Name, 923 Leader: ci.Leader, 924 Peer: getHash(ci.Leader), 925 Replicas: ci.Replicas, 926 Size: mg.ClusterSize(), 927 } 928 } 929 } else { 930 // non leader only include a shortened version without peers 931 leader := s.serverNameForNode(mg.GroupLeader()) 932 jStat.Meta = &MetaClusterInfo{ 933 Name: mg.Group(), 934 Leader: leader, 935 Peer: getHash(leader), 936 Size: mg.ClusterSize(), 937 } 938 } 939 } 940 m.Stats.JetStream = jStat 941 s.mu.RLock() 942 } 943 // Send message. 944 s.sendInternalMsg(subj, _EMPTY_, &m.Server, &m) 945 } 946 947 // Send out our statz update. 948 // This should be wrapChk() to setup common locking. 949 func (s *Server) heartbeatStatsz() { 950 if s.sys.stmr != nil { 951 // Increase after startup to our max. 952 if s.sys.cstatsz < s.sys.statsz { 953 s.sys.cstatsz *= 2 954 if s.sys.cstatsz > s.sys.statsz { 955 s.sys.cstatsz = s.sys.statsz 956 } 957 } 958 s.sys.stmr.Reset(s.sys.cstatsz) 959 } 960 // Do in separate Go routine. 961 go s.sendStatszUpdate() 962 } 963 964 func (s *Server) sendStatszUpdate() { 965 s.sendStatsz(fmt.Sprintf(serverStatsSubj, s.ID())) 966 } 967 968 // This should be wrapChk() to setup common locking. 969 func (s *Server) startStatszTimer() { 970 // We will start by sending out more of these and trail off to the statsz being the max. 971 s.sys.cstatsz = 250 * time.Millisecond 972 // Send out the first one quickly, we will slowly back off. 973 s.sys.stmr = time.AfterFunc(s.sys.cstatsz, s.wrapChk(s.heartbeatStatsz)) 974 } 975 976 // Start a ticker that will fire periodically and check for orphaned servers. 977 // This should be wrapChk() to setup common locking. 978 func (s *Server) startRemoteServerSweepTimer() { 979 s.sys.sweeper = time.AfterFunc(s.sys.chkOrph, s.wrapChk(s.checkRemoteServers)) 980 } 981 982 // Length of our system hash used for server targeted messages. 983 const sysHashLen = 8 984 985 // Computes a hash of 8 characters for the name. 986 func getHash(name string) string { 987 return getHashSize(name, sysHashLen) 988 } 989 990 // Computes a hash for the given `name`. The result will be `size` characters long. 991 func getHashSize(name string, size int) string { 992 sha := sha256.New() 993 sha.Write([]byte(name)) 994 b := sha.Sum(nil) 995 for i := 0; i < size; i++ { 996 b[i] = digits[int(b[i]%base)] 997 } 998 return string(b[:size]) 999 } 1000 1001 // Returns the node name for this server which is a hash of the server name. 1002 func (s *Server) Node() string { 1003 s.mu.RLock() 1004 defer s.mu.RUnlock() 1005 if s.sys != nil { 1006 return s.sys.shash 1007 } 1008 return _EMPTY_ 1009 } 1010 1011 // This will setup our system wide tracking subs. 1012 // For now we will setup one wildcard subscription to 1013 // monitor all accounts for changes in number of connections. 1014 // We can make this on a per account tracking basis if needed. 1015 // Tradeoff is subscription and interest graph events vs connect and 1016 // disconnect events, etc. 1017 func (s *Server) initEventTracking() { 1018 if !s.EventsEnabled() { 1019 return 1020 } 1021 // Create a system hash which we use for other servers to target us specifically. 1022 s.sys.shash = getHash(s.info.Name) 1023 1024 // This will be for all inbox responses. 1025 subject := fmt.Sprintf(inboxRespSubj, s.sys.shash, "*") 1026 if _, err := s.sysSubscribe(subject, s.inboxReply); err != nil { 1027 s.Errorf("Error setting up internal tracking: %v", err) 1028 } 1029 s.sys.inboxPre = subject 1030 // This is for remote updates for connection accounting. 1031 subject = fmt.Sprintf(accConnsEventSubjOld, "*") 1032 if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteConnsUpdate)); err != nil { 1033 s.Errorf("Error setting up internal tracking for %s: %v", subject, err) 1034 } 1035 // This will be for responses for account info that we send out. 1036 subject = fmt.Sprintf(connsRespSubj, s.info.ID) 1037 if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteConnsUpdate)); err != nil { 1038 s.Errorf("Error setting up internal tracking: %v", err) 1039 } 1040 // Listen for broad requests to respond with number of subscriptions for a given subject. 1041 if _, err := s.sysSubscribe(accNumSubsReqSubj, s.noInlineCallback(s.nsubsRequest)); err != nil { 1042 s.Errorf("Error setting up internal tracking: %v", err) 1043 } 1044 // Listen for statsz from others. 1045 subject = fmt.Sprintf(serverStatsSubj, "*") 1046 if sub, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteServerUpdate)); err != nil { 1047 s.Errorf("Error setting up internal tracking: %v", err) 1048 } else { 1049 // Keep track of this one. 1050 s.sys.remoteStatsSub = sub 1051 } 1052 // Listen for all server shutdowns. 1053 subject = fmt.Sprintf(shutdownEventSubj, "*") 1054 if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteServerShutdown)); err != nil { 1055 s.Errorf("Error setting up internal tracking: %v", err) 1056 } 1057 // Listen for servers entering lame-duck mode. 1058 // NOTE: This currently is handled in the same way as a server shutdown, but has 1059 // a different subject in case we need to handle differently in future. 1060 subject = fmt.Sprintf(lameDuckEventSubj, "*") 1061 if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteServerShutdown)); err != nil { 1062 s.Errorf("Error setting up internal tracking: %v", err) 1063 } 1064 // Listen for account claims updates. 1065 subscribeToUpdate := true 1066 if s.accResolver != nil { 1067 subscribeToUpdate = !s.accResolver.IsTrackingUpdate() 1068 } 1069 if subscribeToUpdate { 1070 for _, sub := range []string{accUpdateEventSubjOld, accUpdateEventSubjNew} { 1071 if _, err := s.sysSubscribe(fmt.Sprintf(sub, "*"), s.noInlineCallback(s.accountClaimUpdate)); err != nil { 1072 s.Errorf("Error setting up internal tracking: %v", err) 1073 } 1074 } 1075 } 1076 // Listen for ping messages that will be sent to all servers for statsz. 1077 // This subscription is kept for backwards compatibility. Got replaced by ...PING.STATZ from below 1078 if _, err := s.sysSubscribe(serverStatsPingReqSubj, s.noInlineCallback(s.statszReq)); err != nil { 1079 s.Errorf("Error setting up internal tracking: %v", err) 1080 } 1081 monSrvc := map[string]sysMsgHandler{ 1082 "IDZ": s.idzReq, 1083 "STATSZ": s.statszReq, 1084 "VARZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1085 optz := &VarzEventOptions{} 1086 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { return s.Varz(&optz.VarzOptions) }) 1087 }, 1088 "SUBSZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1089 optz := &SubszEventOptions{} 1090 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { return s.Subsz(&optz.SubszOptions) }) 1091 }, 1092 "CONNZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1093 optz := &ConnzEventOptions{} 1094 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { return s.Connz(&optz.ConnzOptions) }) 1095 }, 1096 "ROUTEZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1097 optz := &RoutezEventOptions{} 1098 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { return s.Routez(&optz.RoutezOptions) }) 1099 }, 1100 "GATEWAYZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1101 optz := &GatewayzEventOptions{} 1102 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { return s.Gatewayz(&optz.GatewayzOptions) }) 1103 }, 1104 "LEAFZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1105 optz := &LeafzEventOptions{} 1106 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { return s.Leafz(&optz.LeafzOptions) }) 1107 }, 1108 "ACCOUNTZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1109 optz := &AccountzEventOptions{} 1110 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { return s.Accountz(&optz.AccountzOptions) }) 1111 }, 1112 "JSZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1113 optz := &JszEventOptions{} 1114 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { return s.Jsz(&optz.JSzOptions) }) 1115 }, 1116 "HEALTHZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1117 optz := &HealthzEventOptions{} 1118 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { return s.healthz(&optz.HealthzOptions), nil }) 1119 }, 1120 "PROFILEZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1121 optz := &ProfilezEventOptions{} 1122 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { return s.profilez(&optz.ProfilezOptions), nil }) 1123 }, 1124 } 1125 for name, req := range monSrvc { 1126 subject = fmt.Sprintf(serverDirectReqSubj, s.info.ID, name) 1127 if _, err := s.sysSubscribe(subject, s.noInlineCallback(req)); err != nil { 1128 s.Errorf("Error setting up internal tracking: %v", err) 1129 } 1130 subject = fmt.Sprintf(serverPingReqSubj, name) 1131 if _, err := s.sysSubscribe(subject, s.noInlineCallback(req)); err != nil { 1132 s.Errorf("Error setting up internal tracking: %v", err) 1133 } 1134 } 1135 extractAccount := func(subject string) (string, error) { 1136 if tk := strings.Split(subject, tsep); len(tk) != accReqTokens { 1137 return _EMPTY_, fmt.Errorf("subject %q is malformed", subject) 1138 } else { 1139 return tk[accReqAccIndex], nil 1140 } 1141 } 1142 monAccSrvc := map[string]sysMsgHandler{ 1143 "SUBSZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1144 optz := &SubszEventOptions{} 1145 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { 1146 if acc, err := extractAccount(subject); err != nil { 1147 return nil, err 1148 } else { 1149 optz.SubszOptions.Subscriptions = true 1150 optz.SubszOptions.Account = acc 1151 return s.Subsz(&optz.SubszOptions) 1152 } 1153 }) 1154 }, 1155 "CONNZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1156 optz := &ConnzEventOptions{} 1157 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { 1158 if acc, err := extractAccount(subject); err != nil { 1159 return nil, err 1160 } else { 1161 optz.ConnzOptions.Account = acc 1162 return s.Connz(&optz.ConnzOptions) 1163 } 1164 }) 1165 }, 1166 "LEAFZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1167 optz := &LeafzEventOptions{} 1168 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { 1169 if acc, err := extractAccount(subject); err != nil { 1170 return nil, err 1171 } else { 1172 optz.LeafzOptions.Account = acc 1173 return s.Leafz(&optz.LeafzOptions) 1174 } 1175 }) 1176 }, 1177 "JSZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1178 optz := &JszEventOptions{} 1179 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { 1180 if acc, err := extractAccount(subject); err != nil { 1181 return nil, err 1182 } else { 1183 optz.Account = acc 1184 return s.JszAccount(&optz.JSzOptions) 1185 } 1186 }) 1187 }, 1188 "INFO": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1189 optz := &AccInfoEventOptions{} 1190 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { 1191 if acc, err := extractAccount(subject); err != nil { 1192 return nil, err 1193 } else { 1194 return s.accountInfo(acc) 1195 } 1196 }) 1197 }, 1198 // STATZ is essentially a duplicate of CONNS with an envelope identical to the others. 1199 // For historical reasons CONNS is the odd one out. 1200 // STATZ is also less heavy weight than INFO 1201 "STATZ": func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1202 optz := &AccountStatzEventOptions{} 1203 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { 1204 if acc, err := extractAccount(subject); err != nil { 1205 return nil, err 1206 } else if acc == "PING" { // Filter PING subject. Happens for server as well. But wildcards are not used 1207 return nil, errSkipZreq 1208 } else { 1209 optz.Accounts = []string{acc} 1210 if stz, err := s.AccountStatz(&optz.AccountStatzOptions); err != nil { 1211 return nil, err 1212 } else if len(stz.Accounts) == 0 && !optz.IncludeUnused { 1213 return nil, errSkipZreq 1214 } else { 1215 return stz, nil 1216 } 1217 } 1218 }) 1219 }, 1220 "CONNS": s.connsRequest, 1221 } 1222 for name, req := range monAccSrvc { 1223 if _, err := s.sysSubscribe(fmt.Sprintf(accDirectReqSubj, "*", name), s.noInlineCallback(req)); err != nil { 1224 s.Errorf("Error setting up internal tracking: %v", err) 1225 } 1226 } 1227 1228 // User info. 1229 // TODO(dlc) - Can be internal and not forwarded since bound server for the client connection 1230 // is only one that will answer. This breaks tests since we still forward on remote server connect. 1231 if _, err := s.sysSubscribe(fmt.Sprintf(userDirectReqSubj, "*"), s.userInfoReq); err != nil { 1232 s.Errorf("Error setting up internal tracking: %v", err) 1233 } 1234 1235 // For now only the STATZ subject has an account specific ping equivalent. 1236 if _, err := s.sysSubscribe(fmt.Sprintf(accPingReqSubj, "STATZ"), 1237 s.noInlineCallback(func(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1238 optz := &AccountStatzEventOptions{} 1239 s.zReq(c, reply, hdr, msg, &optz.EventFilterOptions, optz, func() (any, error) { 1240 if stz, err := s.AccountStatz(&optz.AccountStatzOptions); err != nil { 1241 return nil, err 1242 } else if len(stz.Accounts) == 0 && !optz.IncludeUnused { 1243 return nil, errSkipZreq 1244 } else { 1245 return stz, nil 1246 } 1247 }) 1248 })); err != nil { 1249 s.Errorf("Error setting up internal tracking: %v", err) 1250 } 1251 1252 // Listen for updates when leaf nodes connect for a given account. This will 1253 // force any gateway connections to move to `modeInterestOnly` 1254 subject = fmt.Sprintf(leafNodeConnectEventSubj, "*") 1255 if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.leafNodeConnected)); err != nil { 1256 s.Errorf("Error setting up internal tracking: %v", err) 1257 } 1258 // For tracking remote latency measurements. 1259 subject = fmt.Sprintf(remoteLatencyEventSubj, s.sys.shash) 1260 if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.remoteLatencyUpdate)); err != nil { 1261 s.Errorf("Error setting up internal latency tracking: %v", err) 1262 } 1263 // This is for simple debugging of number of subscribers that exist in the system. 1264 if _, err := s.sysSubscribeInternal(accSubsSubj, s.noInlineCallback(s.debugSubscribers)); err != nil { 1265 s.Errorf("Error setting up internal debug service for subscribers: %v", err) 1266 } 1267 1268 // Listen for requests to reload the server configuration. 1269 subject = fmt.Sprintf(serverReloadReqSubj, s.info.ID) 1270 if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.reloadConfig)); err != nil { 1271 s.Errorf("Error setting up server reload handler: %v", err) 1272 } 1273 1274 // Client connection kick 1275 subject = fmt.Sprintf(clientKickReqSubj, s.info.ID) 1276 if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.kickClient)); err != nil { 1277 s.Errorf("Error setting up client kick service: %v", err) 1278 } 1279 // Client connection LDM 1280 subject = fmt.Sprintf(clientLDMReqSubj, s.info.ID) 1281 if _, err := s.sysSubscribe(subject, s.noInlineCallback(s.ldmClient)); err != nil { 1282 s.Errorf("Error setting up client LDM service: %v", err) 1283 } 1284 } 1285 1286 // UserInfo returns basic information to a user about bound account and user permissions. 1287 // For account information they will need to ping that separately, and this allows security 1288 // controls on each subsystem if desired, e.g. account info, jetstream account info, etc. 1289 type UserInfo struct { 1290 UserID string `json:"user"` 1291 Account string `json:"account"` 1292 Permissions *Permissions `json:"permissions,omitempty"` 1293 Expires time.Duration `json:"expires,omitempty"` 1294 } 1295 1296 // Process a user info request. 1297 func (s *Server) userInfoReq(sub *subscription, c *client, _ *Account, subject, reply string, msg []byte) { 1298 if !s.EventsEnabled() || reply == _EMPTY_ { 1299 return 1300 } 1301 1302 response := &ServerAPIResponse{Server: &ServerInfo{}} 1303 1304 ci, _, _, _, err := s.getRequestInfo(c, msg) 1305 if err != nil { 1306 response.Error = &ApiError{Code: http.StatusBadRequest} 1307 s.sendInternalResponse(reply, response) 1308 return 1309 } 1310 1311 response.Data = &UserInfo{ 1312 UserID: ci.User, 1313 Account: ci.Account, 1314 Permissions: c.publicPermissions(), 1315 Expires: c.claimExpiration(), 1316 } 1317 s.sendInternalResponse(reply, response) 1318 } 1319 1320 // register existing accounts with any system exports. 1321 func (s *Server) registerSystemImportsForExisting() { 1322 var accounts []*Account 1323 1324 s.mu.RLock() 1325 if s.sys == nil { 1326 s.mu.RUnlock() 1327 return 1328 } 1329 sacc := s.sys.account 1330 s.accounts.Range(func(k, v any) bool { 1331 a := v.(*Account) 1332 if a != sacc { 1333 accounts = append(accounts, a) 1334 } 1335 return true 1336 }) 1337 s.mu.RUnlock() 1338 1339 for _, a := range accounts { 1340 s.registerSystemImports(a) 1341 } 1342 } 1343 1344 // add all exports a system account will need 1345 func (s *Server) addSystemAccountExports(sacc *Account) { 1346 if !s.EventsEnabled() { 1347 return 1348 } 1349 accConnzSubj := fmt.Sprintf(accDirectReqSubj, "*", "CONNZ") 1350 // prioritize not automatically added exports 1351 if !sacc.hasServiceExportMatching(accConnzSubj) { 1352 // pick export type that clamps importing account id into subject 1353 if err := sacc.addServiceExportWithResponseAndAccountPos(accConnzSubj, Streamed, nil, 4); err != nil { 1354 //if err := sacc.AddServiceExportWithResponse(accConnzSubj, Streamed, nil); err != nil { 1355 s.Errorf("Error adding system service export for %q: %v", accConnzSubj, err) 1356 } 1357 } 1358 // prioritize not automatically added exports 1359 accStatzSubj := fmt.Sprintf(accDirectReqSubj, "*", "STATZ") 1360 if !sacc.hasServiceExportMatching(accStatzSubj) { 1361 // pick export type that clamps importing account id into subject 1362 if err := sacc.addServiceExportWithResponseAndAccountPos(accStatzSubj, Streamed, nil, 4); err != nil { 1363 s.Errorf("Error adding system service export for %q: %v", accStatzSubj, err) 1364 } 1365 } 1366 // FIXME(dlc) - Old experiment, Remove? 1367 if !sacc.hasServiceExportMatching(accSubsSubj) { 1368 if err := sacc.AddServiceExport(accSubsSubj, nil); err != nil { 1369 s.Errorf("Error adding system service export for %q: %v", accSubsSubj, err) 1370 } 1371 } 1372 1373 // User info export. 1374 userInfoSubj := fmt.Sprintf(userDirectReqSubj, "*") 1375 if !sacc.hasServiceExportMatching(userInfoSubj) { 1376 if err := sacc.AddServiceExport(userInfoSubj, nil); err != nil { 1377 s.Errorf("Error adding system service export for %q: %v", userInfoSubj, err) 1378 } 1379 mappedSubj := fmt.Sprintf(userDirectReqSubj, sacc.GetName()) 1380 if err := sacc.AddServiceImport(sacc, userDirectInfoSubj, mappedSubj); err != nil { 1381 s.Errorf("Error setting up system service import %s: %v", mappedSubj, err) 1382 } 1383 // Make sure to share details. 1384 sacc.setServiceImportSharing(sacc, mappedSubj, false, true) 1385 } 1386 1387 // Register any accounts that existed prior. 1388 s.registerSystemImportsForExisting() 1389 1390 // in case of a mixed mode setup, enable js exports anyway 1391 if s.JetStreamEnabled() || !s.standAloneMode() { 1392 s.checkJetStreamExports() 1393 } 1394 } 1395 1396 // accountClaimUpdate will receive claim updates for accounts. 1397 func (s *Server) accountClaimUpdate(sub *subscription, c *client, _ *Account, subject, resp string, hdr, msg []byte) { 1398 if !s.EventsEnabled() { 1399 return 1400 } 1401 var pubKey string 1402 toks := strings.Split(subject, tsep) 1403 if len(toks) == accUpdateTokensNew { 1404 pubKey = toks[accReqAccIndex] 1405 } else if len(toks) == accUpdateTokensOld { 1406 pubKey = toks[accUpdateAccIdxOld] 1407 } else { 1408 s.Debugf("Received account claims update on bad subject %q", subject) 1409 return 1410 } 1411 if len(msg) == 0 { 1412 err := errors.New("request body is empty") 1413 respondToUpdate(s, resp, pubKey, "jwt update error", err) 1414 } else if claim, err := jwt.DecodeAccountClaims(string(msg)); err != nil { 1415 respondToUpdate(s, resp, pubKey, "jwt update resulted in error", err) 1416 } else if claim.Subject != pubKey { 1417 err := errors.New("subject does not match jwt content") 1418 respondToUpdate(s, resp, pubKey, "jwt update resulted in error", err) 1419 } else if v, ok := s.accounts.Load(pubKey); !ok { 1420 respondToUpdate(s, resp, pubKey, "jwt update skipped", nil) 1421 } else if err := s.updateAccountWithClaimJWT(v.(*Account), string(msg)); err != nil { 1422 respondToUpdate(s, resp, pubKey, "jwt update resulted in error", err) 1423 } else { 1424 respondToUpdate(s, resp, pubKey, "jwt updated", nil) 1425 } 1426 } 1427 1428 // processRemoteServerShutdown will update any affected accounts. 1429 // Will update the remote count for clients. 1430 // Lock assume held. 1431 func (s *Server) processRemoteServerShutdown(sid string) { 1432 s.accounts.Range(func(k, v any) bool { 1433 v.(*Account).removeRemoteServer(sid) 1434 return true 1435 }) 1436 // Update any state in nodeInfo. 1437 s.nodeToInfo.Range(func(k, v any) bool { 1438 ni := v.(nodeInfo) 1439 if ni.id == sid { 1440 ni.offline = true 1441 s.nodeToInfo.Store(k, ni) 1442 return false 1443 } 1444 return true 1445 }) 1446 delete(s.sys.servers, sid) 1447 } 1448 1449 func (s *Server) sameDomain(domain string) bool { 1450 return domain == _EMPTY_ || s.info.Domain == _EMPTY_ || domain == s.info.Domain 1451 } 1452 1453 // remoteServerShutdown is called when we get an event from another server shutting down. 1454 func (s *Server) remoteServerShutdown(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1455 s.mu.Lock() 1456 defer s.mu.Unlock() 1457 if !s.eventsEnabled() { 1458 return 1459 } 1460 toks := strings.Split(subject, tsep) 1461 if len(toks) < shutdownEventTokens { 1462 s.Debugf("Received remote server shutdown on bad subject %q", subject) 1463 return 1464 } 1465 1466 if len(msg) == 0 { 1467 s.Errorf("Remote server sent invalid (empty) shutdown message to %q", subject) 1468 return 1469 } 1470 1471 // We have an optional serverInfo here, remove from nodeToX lookups. 1472 var si ServerInfo 1473 if err := json.Unmarshal(msg, &si); err != nil { 1474 s.Debugf("Received bad server info for remote server shutdown") 1475 return 1476 } 1477 1478 // JetStream node updates if applicable. 1479 node := getHash(si.Name) 1480 if v, ok := s.nodeToInfo.Load(node); ok && v != nil { 1481 ni := v.(nodeInfo) 1482 ni.offline = true 1483 s.nodeToInfo.Store(node, ni) 1484 } 1485 1486 sid := toks[serverSubjectIndex] 1487 if su := s.sys.servers[sid]; su != nil { 1488 s.processRemoteServerShutdown(sid) 1489 } 1490 } 1491 1492 // remoteServerUpdate listens for statsz updates from other servers. 1493 func (s *Server) remoteServerUpdate(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1494 var ssm ServerStatsMsg 1495 if len(msg) == 0 { 1496 s.Debugf("Received empty server info for remote server update") 1497 return 1498 } else if err := json.Unmarshal(msg, &ssm); err != nil { 1499 s.Debugf("Received bad server info for remote server update") 1500 return 1501 } 1502 si := ssm.Server 1503 1504 // Should do normal updates before bailing if wrong domain. 1505 s.mu.Lock() 1506 if s.isRunning() && s.eventsEnabled() && ssm.Server.ID != s.info.ID { 1507 s.updateRemoteServer(&si) 1508 } 1509 s.mu.Unlock() 1510 1511 // JetStream node updates. 1512 if !s.sameDomain(si.Domain) { 1513 return 1514 } 1515 1516 var cfg *JetStreamConfig 1517 var stats *JetStreamStats 1518 1519 if ssm.Stats.JetStream != nil { 1520 cfg = ssm.Stats.JetStream.Config 1521 stats = ssm.Stats.JetStream.Stats 1522 } 1523 1524 node := getHash(si.Name) 1525 s.nodeToInfo.Store(node, nodeInfo{ 1526 si.Name, 1527 si.Version, 1528 si.Cluster, 1529 si.Domain, 1530 si.ID, 1531 si.Tags, 1532 cfg, 1533 stats, 1534 false, 1535 si.JetStreamEnabled(), 1536 si.BinaryStreamSnapshot(), 1537 }) 1538 } 1539 1540 // updateRemoteServer is called when we have an update from a remote server. 1541 // This allows us to track remote servers, respond to shutdown messages properly, 1542 // make sure that messages are ordered, and allow us to prune dead servers. 1543 // Lock should be held upon entry. 1544 func (s *Server) updateRemoteServer(si *ServerInfo) { 1545 su := s.sys.servers[si.ID] 1546 if su == nil { 1547 s.sys.servers[si.ID] = &serverUpdate{si.Seq, time.Now()} 1548 s.processNewServer(si) 1549 } else { 1550 // Should always be going up. 1551 if si.Seq <= su.seq { 1552 s.Errorf("Received out of order remote server update from: %q", si.ID) 1553 return 1554 } 1555 su.seq = si.Seq 1556 su.ltime = time.Now() 1557 } 1558 } 1559 1560 // processNewServer will hold any logic we want to use when we discover a new server. 1561 // Lock should be held upon entry. 1562 func (s *Server) processNewServer(si *ServerInfo) { 1563 // Right now we only check if we have leafnode servers and if so send another 1564 // connect update to make sure they switch this account to interest only mode. 1565 s.ensureGWsInterestOnlyForLeafNodes() 1566 1567 // Add to our nodeToName 1568 if s.sameDomain(si.Domain) { 1569 node := getHash(si.Name) 1570 // Only update if non-existent 1571 if _, ok := s.nodeToInfo.Load(node); !ok { 1572 s.nodeToInfo.Store(node, nodeInfo{ 1573 si.Name, 1574 si.Version, 1575 si.Cluster, 1576 si.Domain, 1577 si.ID, 1578 si.Tags, 1579 nil, 1580 nil, 1581 false, 1582 si.JetStreamEnabled(), 1583 si.BinaryStreamSnapshot(), 1584 }) 1585 } 1586 } 1587 // Announce ourselves.. 1588 // Do this in a separate Go routine. 1589 go s.sendStatszUpdate() 1590 } 1591 1592 // If GW is enabled on this server and there are any leaf node connections, 1593 // this function will send a LeafNode connect system event to the super cluster 1594 // to ensure that the GWs are in interest-only mode for this account. 1595 // Lock should be held upon entry. 1596 // TODO(dlc) - this will cause this account to be loaded on all servers. Need a better 1597 // way with GW2. 1598 func (s *Server) ensureGWsInterestOnlyForLeafNodes() { 1599 if !s.gateway.enabled || len(s.leafs) == 0 { 1600 return 1601 } 1602 sent := make(map[*Account]bool, len(s.leafs)) 1603 for _, c := range s.leafs { 1604 if !sent[c.acc] { 1605 s.sendLeafNodeConnectMsg(c.acc.Name) 1606 sent[c.acc] = true 1607 } 1608 } 1609 } 1610 1611 // shutdownEventing will clean up all eventing state. 1612 func (s *Server) shutdownEventing() { 1613 if !s.eventsRunning() { 1614 return 1615 } 1616 1617 s.mu.Lock() 1618 clearTimer(&s.sys.sweeper) 1619 clearTimer(&s.sys.stmr) 1620 rc := s.sys.resetCh 1621 s.sys.resetCh = nil 1622 wg := &s.sys.wg 1623 s.mu.Unlock() 1624 1625 // We will queue up a shutdown event and wait for the 1626 // internal send loop to exit. 1627 s.sendShutdownEvent() 1628 wg.Wait() 1629 1630 s.mu.Lock() 1631 defer s.mu.Unlock() 1632 1633 // Whip through all accounts. 1634 s.accounts.Range(func(k, v any) bool { 1635 v.(*Account).clearEventing() 1636 return true 1637 }) 1638 // Turn everything off here. 1639 s.sys = nil 1640 // Make sure this is done after s.sys = nil, so that we don't 1641 // get sends to closed channels on badly-timed config reloads. 1642 close(rc) 1643 } 1644 1645 // Request for our local connection count. 1646 func (s *Server) connsRequest(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1647 if !s.eventsRunning() { 1648 return 1649 } 1650 tk := strings.Split(subject, tsep) 1651 if len(tk) != accReqTokens { 1652 s.sys.client.Errorf("Bad subject account connections request message") 1653 return 1654 } 1655 a := tk[accReqAccIndex] 1656 m := accNumConnsReq{Account: a} 1657 if len(msg) > 0 { 1658 if err := json.Unmarshal(msg, &m); err != nil { 1659 s.sys.client.Errorf("Error unmarshalling account connections request message: %v", err) 1660 return 1661 } 1662 } 1663 if m.Account != a { 1664 s.sys.client.Errorf("Error unmarshalled account does not match subject") 1665 return 1666 } 1667 // Here we really only want to lookup the account if its local. We do not want to fetch this 1668 // account if we have no interest in it. 1669 var acc *Account 1670 if v, ok := s.accounts.Load(m.Account); ok { 1671 acc = v.(*Account) 1672 } 1673 if acc == nil { 1674 return 1675 } 1676 // We know this is a local connection. 1677 if nlc := acc.NumLocalConnections(); nlc > 0 { 1678 s.mu.Lock() 1679 s.sendAccConnsUpdate(acc, reply) 1680 s.mu.Unlock() 1681 } 1682 } 1683 1684 // leafNodeConnected is an event we will receive when a leaf node for a given account connects. 1685 func (s *Server) leafNodeConnected(sub *subscription, _ *client, _ *Account, subject, reply string, hdr, msg []byte) { 1686 m := accNumConnsReq{} 1687 if err := json.Unmarshal(msg, &m); err != nil { 1688 s.sys.client.Errorf("Error unmarshalling account connections request message: %v", err) 1689 return 1690 } 1691 1692 s.mu.RLock() 1693 na := m.Account == _EMPTY_ || !s.eventsEnabled() || !s.gateway.enabled 1694 s.mu.RUnlock() 1695 1696 if na { 1697 return 1698 } 1699 1700 if acc, _ := s.lookupAccount(m.Account); acc != nil { 1701 s.switchAccountToInterestMode(acc.Name) 1702 } 1703 } 1704 1705 // Common filter options for system requests STATSZ VARZ SUBSZ CONNZ ROUTEZ GATEWAYZ LEAFZ 1706 type EventFilterOptions struct { 1707 Name string `json:"server_name,omitempty"` // filter by server name 1708 Cluster string `json:"cluster,omitempty"` // filter by cluster name 1709 Host string `json:"host,omitempty"` // filter by host name 1710 Tags []string `json:"tags,omitempty"` // filter by tags (must match all tags) 1711 Domain string `json:"domain,omitempty"` // filter by JS domain 1712 } 1713 1714 // StatszEventOptions are options passed to Statsz 1715 type StatszEventOptions struct { 1716 // No actual options yet 1717 EventFilterOptions 1718 } 1719 1720 // Options for account Info 1721 type AccInfoEventOptions struct { 1722 // No actual options yet 1723 EventFilterOptions 1724 } 1725 1726 // In the context of system events, ConnzEventOptions are options passed to Connz 1727 type ConnzEventOptions struct { 1728 ConnzOptions 1729 EventFilterOptions 1730 } 1731 1732 // In the context of system events, RoutezEventOptions are options passed to Routez 1733 type RoutezEventOptions struct { 1734 RoutezOptions 1735 EventFilterOptions 1736 } 1737 1738 // In the context of system events, SubzEventOptions are options passed to Subz 1739 type SubszEventOptions struct { 1740 SubszOptions 1741 EventFilterOptions 1742 } 1743 1744 // In the context of system events, VarzEventOptions are options passed to Varz 1745 type VarzEventOptions struct { 1746 VarzOptions 1747 EventFilterOptions 1748 } 1749 1750 // In the context of system events, GatewayzEventOptions are options passed to Gatewayz 1751 type GatewayzEventOptions struct { 1752 GatewayzOptions 1753 EventFilterOptions 1754 } 1755 1756 // In the context of system events, LeafzEventOptions are options passed to Leafz 1757 type LeafzEventOptions struct { 1758 LeafzOptions 1759 EventFilterOptions 1760 } 1761 1762 // In the context of system events, AccountzEventOptions are options passed to Accountz 1763 type AccountzEventOptions struct { 1764 AccountzOptions 1765 EventFilterOptions 1766 } 1767 1768 // In the context of system events, AccountzEventOptions are options passed to Accountz 1769 type AccountStatzEventOptions struct { 1770 AccountStatzOptions 1771 EventFilterOptions 1772 } 1773 1774 // In the context of system events, JszEventOptions are options passed to Jsz 1775 type JszEventOptions struct { 1776 JSzOptions 1777 EventFilterOptions 1778 } 1779 1780 // In the context of system events, HealthzEventOptions are options passed to Healthz 1781 type HealthzEventOptions struct { 1782 HealthzOptions 1783 EventFilterOptions 1784 } 1785 1786 // In the context of system events, ProfilezEventOptions are options passed to Profilez 1787 type ProfilezEventOptions struct { 1788 ProfilezOptions 1789 EventFilterOptions 1790 } 1791 1792 // returns true if the request does NOT apply to this server and can be ignored. 1793 // DO NOT hold the server lock when 1794 func (s *Server) filterRequest(fOpts *EventFilterOptions) bool { 1795 if fOpts.Name != _EMPTY_ && !strings.Contains(s.info.Name, fOpts.Name) { 1796 return true 1797 } 1798 if fOpts.Host != _EMPTY_ && !strings.Contains(s.info.Host, fOpts.Host) { 1799 return true 1800 } 1801 if fOpts.Cluster != _EMPTY_ { 1802 if !strings.Contains(s.ClusterName(), fOpts.Cluster) { 1803 return true 1804 } 1805 } 1806 if len(fOpts.Tags) > 0 { 1807 opts := s.getOpts() 1808 for _, t := range fOpts.Tags { 1809 if !opts.Tags.Contains(t) { 1810 return true 1811 } 1812 } 1813 } 1814 if fOpts.Domain != _EMPTY_ && s.getOpts().JetStreamDomain != fOpts.Domain { 1815 return true 1816 } 1817 return false 1818 } 1819 1820 // Encoding support (compression) 1821 type compressionType int8 1822 1823 const ( 1824 noCompression = compressionType(iota) 1825 gzipCompression 1826 snappyCompression 1827 unsupportedCompression 1828 ) 1829 1830 // ServerAPIResponse is the response type for the server API like varz, connz etc. 1831 type ServerAPIResponse struct { 1832 Server *ServerInfo `json:"server"` 1833 Data any `json:"data,omitempty"` 1834 Error *ApiError `json:"error,omitempty"` 1835 1836 // Private to indicate compression if any. 1837 compress compressionType 1838 } 1839 1840 // Specialized response types for unmarshalling. 1841 1842 // ServerAPIConnzResponse is the response type connz 1843 type ServerAPIConnzResponse struct { 1844 Server *ServerInfo `json:"server"` 1845 Data *Connz `json:"data,omitempty"` 1846 Error *ApiError `json:"error,omitempty"` 1847 } 1848 1849 // statszReq is a request for us to respond with current statsz. 1850 func (s *Server) statszReq(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1851 if !s.EventsEnabled() { 1852 return 1853 } 1854 1855 // No reply is a signal that we should use our normal broadcast subject. 1856 if reply == _EMPTY_ { 1857 reply = fmt.Sprintf(serverStatsSubj, s.info.ID) 1858 } 1859 1860 opts := StatszEventOptions{} 1861 if len(msg) != 0 { 1862 if err := json.Unmarshal(msg, &opts); err != nil { 1863 response := &ServerAPIResponse{ 1864 Server: &ServerInfo{}, 1865 Error: &ApiError{Code: http.StatusBadRequest, Description: err.Error()}, 1866 } 1867 s.sendInternalMsgLocked(reply, _EMPTY_, response.Server, response) 1868 return 1869 } else if ignore := s.filterRequest(&opts.EventFilterOptions); ignore { 1870 return 1871 } 1872 } 1873 s.sendStatsz(reply) 1874 } 1875 1876 // idzReq is for a request for basic static server info. 1877 // Try to not hold the write lock or dynamically create data. 1878 func (s *Server) idzReq(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1879 s.mu.RLock() 1880 defer s.mu.RUnlock() 1881 id := &ServerID{ 1882 Name: s.info.Name, 1883 Host: s.info.Host, 1884 ID: s.info.ID, 1885 } 1886 s.sendInternalMsg(reply, _EMPTY_, nil, &id) 1887 } 1888 1889 var errSkipZreq = errors.New("filtered response") 1890 1891 const ( 1892 acceptEncodingHeader = "Accept-Encoding" 1893 contentEncodingHeader = "Content-Encoding" 1894 ) 1895 1896 // This is not as formal as it could be. We see if anything has s2 or snappy first, then gzip. 1897 func getAcceptEncoding(hdr []byte) compressionType { 1898 ae := strings.ToLower(string(getHeader(acceptEncodingHeader, hdr))) 1899 if ae == _EMPTY_ { 1900 return noCompression 1901 } 1902 if strings.Contains(ae, "snappy") || strings.Contains(ae, "s2") { 1903 return snappyCompression 1904 } 1905 if strings.Contains(ae, "gzip") { 1906 return gzipCompression 1907 } 1908 return unsupportedCompression 1909 } 1910 1911 func (s *Server) zReq(_ *client, reply string, hdr, msg []byte, fOpts *EventFilterOptions, optz any, respf func() (any, error)) { 1912 if !s.EventsEnabled() || reply == _EMPTY_ { 1913 return 1914 } 1915 response := &ServerAPIResponse{Server: &ServerInfo{}} 1916 var err error 1917 status := 0 1918 if len(msg) != 0 { 1919 if err = json.Unmarshal(msg, optz); err != nil { 1920 status = http.StatusBadRequest // status is only included on error, so record how far execution got 1921 } else if s.filterRequest(fOpts) { 1922 return 1923 } 1924 } 1925 if err == nil { 1926 response.Data, err = respf() 1927 if errors.Is(err, errSkipZreq) { 1928 return 1929 } else if err != nil { 1930 status = http.StatusInternalServerError 1931 } 1932 } 1933 if err != nil { 1934 response.Error = &ApiError{Code: status, Description: err.Error()} 1935 } else if len(hdr) > 0 { 1936 response.compress = getAcceptEncoding(hdr) 1937 } 1938 s.sendInternalResponse(reply, response) 1939 } 1940 1941 // remoteConnsUpdate gets called when we receive a remote update from another server. 1942 func (s *Server) remoteConnsUpdate(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 1943 if !s.eventsRunning() { 1944 return 1945 } 1946 var m AccountNumConns 1947 if len(msg) == 0 { 1948 s.sys.client.Errorf("No message body provided") 1949 return 1950 } else if err := json.Unmarshal(msg, &m); err != nil { 1951 s.sys.client.Errorf("Error unmarshalling account connection event message: %v", err) 1952 return 1953 } 1954 1955 // See if we have the account registered, if not drop it. 1956 // Make sure this does not force us to load this account here. 1957 var acc *Account 1958 if v, ok := s.accounts.Load(m.Account); ok { 1959 acc = v.(*Account) 1960 } 1961 // Silently ignore these if we do not have local interest in the account. 1962 if acc == nil { 1963 return 1964 } 1965 1966 s.mu.Lock() 1967 1968 // check again here if we have been shutdown. 1969 if !s.isRunning() || !s.eventsEnabled() { 1970 s.mu.Unlock() 1971 return 1972 } 1973 // Double check that this is not us, should never happen, so error if it does. 1974 if m.Server.ID == s.info.ID { 1975 s.sys.client.Errorf("Processing our own account connection event message: ignored") 1976 s.mu.Unlock() 1977 return 1978 } 1979 // If we are here we have interest in tracking this account. Update our accounting. 1980 clients := acc.updateRemoteServer(&m) 1981 s.updateRemoteServer(&m.Server) 1982 s.mu.Unlock() 1983 // Need to close clients outside of server lock 1984 for _, c := range clients { 1985 c.maxAccountConnExceeded() 1986 } 1987 } 1988 1989 // This will import any system level exports. 1990 func (s *Server) registerSystemImports(a *Account) { 1991 if a == nil || !s.EventsEnabled() { 1992 return 1993 } 1994 sacc := s.SystemAccount() 1995 if sacc == nil || sacc == a { 1996 return 1997 } 1998 // FIXME(dlc) - make a shared list between sys exports etc. 1999 2000 importSrvc := func(subj, mappedSubj string) { 2001 if !a.serviceImportExists(subj) { 2002 if err := a.addServiceImportWithClaim(sacc, subj, mappedSubj, nil, true); err != nil { 2003 s.Errorf("Error setting up system service import %s -> %s for account: %v", 2004 subj, mappedSubj, err) 2005 } 2006 } 2007 } 2008 // Add in this to the account in 2 places. 2009 // "$SYS.REQ.SERVER.PING.CONNZ" and "$SYS.REQ.ACCOUNT.PING.CONNZ" 2010 mappedConnzSubj := fmt.Sprintf(accDirectReqSubj, a.Name, "CONNZ") 2011 importSrvc(fmt.Sprintf(accPingReqSubj, "CONNZ"), mappedConnzSubj) 2012 importSrvc(fmt.Sprintf(serverPingReqSubj, "CONNZ"), mappedConnzSubj) 2013 importSrvc(fmt.Sprintf(accPingReqSubj, "STATZ"), fmt.Sprintf(accDirectReqSubj, a.Name, "STATZ")) 2014 2015 // This is for user's looking up their own info. 2016 mappedSubject := fmt.Sprintf(userDirectReqSubj, a.Name) 2017 importSrvc(userDirectInfoSubj, mappedSubject) 2018 // Make sure to share details. 2019 a.setServiceImportSharing(sacc, mappedSubject, false, true) 2020 } 2021 2022 // Setup tracking for this account. This allows us to track global account activity. 2023 // Lock should be held on entry. 2024 func (s *Server) enableAccountTracking(a *Account) { 2025 if a == nil || !s.eventsEnabled() { 2026 return 2027 } 2028 2029 // TODO(ik): Generate payload although message may not be sent. 2030 // May need to ensure we do so only if there is a known interest. 2031 // This can get complicated with gateways. 2032 2033 subj := fmt.Sprintf(accDirectReqSubj, a.Name, "CONNS") 2034 reply := fmt.Sprintf(connsRespSubj, s.info.ID) 2035 m := accNumConnsReq{Account: a.Name} 2036 s.sendInternalMsg(subj, reply, &m.Server, &m) 2037 } 2038 2039 // Event on leaf node connect. 2040 // Lock should NOT be held on entry. 2041 func (s *Server) sendLeafNodeConnect(a *Account) { 2042 s.mu.Lock() 2043 // If we are not in operator mode, or do not have any gateways defined, this should also be a no-op. 2044 if a == nil || !s.eventsEnabled() || !s.gateway.enabled { 2045 s.mu.Unlock() 2046 return 2047 } 2048 s.sendLeafNodeConnectMsg(a.Name) 2049 s.mu.Unlock() 2050 2051 s.switchAccountToInterestMode(a.Name) 2052 } 2053 2054 // Send the leafnode connect message. 2055 // Lock should be held. 2056 func (s *Server) sendLeafNodeConnectMsg(accName string) { 2057 subj := fmt.Sprintf(leafNodeConnectEventSubj, accName) 2058 m := accNumConnsReq{Account: accName} 2059 s.sendInternalMsg(subj, _EMPTY_, &m.Server, &m) 2060 } 2061 2062 // sendAccConnsUpdate is called to send out our information on the 2063 // account's local connections. 2064 // Lock should be held on entry. 2065 func (s *Server) sendAccConnsUpdate(a *Account, subj ...string) { 2066 if !s.eventsEnabled() || a == nil { 2067 return 2068 } 2069 sendQ := s.sys.sendq 2070 if sendQ == nil { 2071 return 2072 } 2073 // Build event with account name and number of local clients and leafnodes. 2074 eid := s.nextEventID() 2075 a.mu.Lock() 2076 stat := a.statz() 2077 m := AccountNumConns{ 2078 TypedEvent: TypedEvent{ 2079 Type: AccountNumConnsMsgType, 2080 ID: eid, 2081 Time: time.Now().UTC(), 2082 }, 2083 AccountStat: *stat, 2084 } 2085 // Set timer to fire again unless we are at zero. 2086 if m.TotalConns == 0 { 2087 clearTimer(&a.ctmr) 2088 } else { 2089 // Check to see if we have an HB running and update. 2090 if a.ctmr == nil { 2091 a.ctmr = time.AfterFunc(eventsHBInterval, func() { s.accConnsUpdate(a) }) 2092 } else { 2093 a.ctmr.Reset(eventsHBInterval) 2094 } 2095 } 2096 for _, sub := range subj { 2097 msg := newPubMsg(nil, sub, _EMPTY_, &m.Server, nil, &m, noCompression, false, false) 2098 sendQ.push(msg) 2099 } 2100 a.mu.Unlock() 2101 } 2102 2103 // Lock should be held on entry. 2104 func (a *Account) statz() *AccountStat { 2105 localConns := a.numLocalConnections() 2106 leafConns := a.numLocalLeafNodes() 2107 return &AccountStat{ 2108 Account: a.Name, 2109 Name: a.LogicalName, 2110 Conns: localConns, 2111 LeafNodes: leafConns, 2112 TotalConns: localConns + leafConns, 2113 NumSubs: a.sl.Count(), 2114 Received: DataStats{ 2115 Msgs: atomic.LoadInt64(&a.inMsgs), 2116 Bytes: atomic.LoadInt64(&a.inBytes), 2117 }, 2118 Sent: DataStats{ 2119 Msgs: atomic.LoadInt64(&a.outMsgs), 2120 Bytes: atomic.LoadInt64(&a.outBytes), 2121 }, 2122 SlowConsumers: atomic.LoadInt64(&a.slowConsumers), 2123 } 2124 } 2125 2126 // accConnsUpdate is called whenever there is a change to the account's 2127 // number of active connections, or during a heartbeat. 2128 // We will not send for $G. 2129 func (s *Server) accConnsUpdate(a *Account) { 2130 s.mu.Lock() 2131 defer s.mu.Unlock() 2132 if !s.eventsEnabled() || a == nil || a == s.gacc { 2133 return 2134 } 2135 s.sendAccConnsUpdate(a, fmt.Sprintf(accConnsEventSubjOld, a.Name), fmt.Sprintf(accConnsEventSubjNew, a.Name)) 2136 } 2137 2138 // server lock should be held 2139 func (s *Server) nextEventID() string { 2140 return s.eventIds.Next() 2141 } 2142 2143 // accountConnectEvent will send an account client connect event if there is interest. 2144 // This is a billing event. 2145 func (s *Server) accountConnectEvent(c *client) { 2146 s.mu.Lock() 2147 if !s.eventsEnabled() { 2148 s.mu.Unlock() 2149 return 2150 } 2151 gacc := s.gacc 2152 eid := s.nextEventID() 2153 s.mu.Unlock() 2154 2155 c.mu.Lock() 2156 // Ignore global account activity 2157 if c.acc == nil || c.acc == gacc { 2158 c.mu.Unlock() 2159 return 2160 } 2161 2162 m := ConnectEventMsg{ 2163 TypedEvent: TypedEvent{ 2164 Type: ConnectEventMsgType, 2165 ID: eid, 2166 Time: time.Now().UTC(), 2167 }, 2168 Client: ClientInfo{ 2169 Start: &c.start, 2170 Host: c.host, 2171 ID: c.cid, 2172 Account: accForClient(c), 2173 User: c.getRawAuthUser(), 2174 Name: c.opts.Name, 2175 Lang: c.opts.Lang, 2176 Version: c.opts.Version, 2177 Jwt: c.opts.JWT, 2178 IssuerKey: issuerForClient(c), 2179 Tags: c.tags, 2180 NameTag: c.nameTag, 2181 Kind: c.kindString(), 2182 ClientType: c.clientTypeString(), 2183 MQTTClient: c.getMQTTClientID(), 2184 }, 2185 } 2186 subj := fmt.Sprintf(connectEventSubj, c.acc.Name) 2187 c.mu.Unlock() 2188 2189 s.sendInternalMsgLocked(subj, _EMPTY_, &m.Server, &m) 2190 } 2191 2192 // accountDisconnectEvent will send an account client disconnect event if there is interest. 2193 // This is a billing event. 2194 func (s *Server) accountDisconnectEvent(c *client, now time.Time, reason string) { 2195 s.mu.Lock() 2196 if !s.eventsEnabled() { 2197 s.mu.Unlock() 2198 return 2199 } 2200 gacc := s.gacc 2201 eid := s.nextEventID() 2202 s.mu.Unlock() 2203 2204 c.mu.Lock() 2205 2206 // Ignore global account activity 2207 if c.acc == nil || c.acc == gacc { 2208 c.mu.Unlock() 2209 return 2210 } 2211 2212 m := DisconnectEventMsg{ 2213 TypedEvent: TypedEvent{ 2214 Type: DisconnectEventMsgType, 2215 ID: eid, 2216 Time: now, 2217 }, 2218 Client: ClientInfo{ 2219 Start: &c.start, 2220 Stop: &now, 2221 Host: c.host, 2222 ID: c.cid, 2223 Account: accForClient(c), 2224 User: c.getRawAuthUser(), 2225 Name: c.opts.Name, 2226 Lang: c.opts.Lang, 2227 Version: c.opts.Version, 2228 RTT: c.getRTT(), 2229 Jwt: c.opts.JWT, 2230 IssuerKey: issuerForClient(c), 2231 Tags: c.tags, 2232 NameTag: c.nameTag, 2233 Kind: c.kindString(), 2234 ClientType: c.clientTypeString(), 2235 MQTTClient: c.getMQTTClientID(), 2236 }, 2237 Sent: DataStats{ 2238 Msgs: atomic.LoadInt64(&c.inMsgs), 2239 Bytes: atomic.LoadInt64(&c.inBytes), 2240 }, 2241 Received: DataStats{ 2242 Msgs: c.outMsgs, 2243 Bytes: c.outBytes, 2244 }, 2245 Reason: reason, 2246 } 2247 accName := c.acc.Name 2248 c.mu.Unlock() 2249 2250 subj := fmt.Sprintf(disconnectEventSubj, accName) 2251 s.sendInternalMsgLocked(subj, _EMPTY_, &m.Server, &m) 2252 } 2253 2254 // This is the system level event sent to the system account for operators. 2255 func (s *Server) sendAuthErrorEvent(c *client) { 2256 s.mu.Lock() 2257 if !s.eventsEnabled() { 2258 s.mu.Unlock() 2259 return 2260 } 2261 eid := s.nextEventID() 2262 s.mu.Unlock() 2263 2264 now := time.Now().UTC() 2265 c.mu.Lock() 2266 m := DisconnectEventMsg{ 2267 TypedEvent: TypedEvent{ 2268 Type: DisconnectEventMsgType, 2269 ID: eid, 2270 Time: now, 2271 }, 2272 Client: ClientInfo{ 2273 Start: &c.start, 2274 Stop: &now, 2275 Host: c.host, 2276 ID: c.cid, 2277 Account: accForClient(c), 2278 User: c.getRawAuthUser(), 2279 Name: c.opts.Name, 2280 Lang: c.opts.Lang, 2281 Version: c.opts.Version, 2282 RTT: c.getRTT(), 2283 Jwt: c.opts.JWT, 2284 IssuerKey: issuerForClient(c), 2285 Tags: c.tags, 2286 NameTag: c.nameTag, 2287 Kind: c.kindString(), 2288 ClientType: c.clientTypeString(), 2289 MQTTClient: c.getMQTTClientID(), 2290 }, 2291 Sent: DataStats{ 2292 Msgs: c.inMsgs, 2293 Bytes: c.inBytes, 2294 }, 2295 Received: DataStats{ 2296 Msgs: c.outMsgs, 2297 Bytes: c.outBytes, 2298 }, 2299 Reason: AuthenticationViolation.String(), 2300 } 2301 c.mu.Unlock() 2302 2303 s.mu.Lock() 2304 subj := fmt.Sprintf(authErrorEventSubj, s.info.ID) 2305 s.sendInternalMsg(subj, _EMPTY_, &m.Server, &m) 2306 s.mu.Unlock() 2307 } 2308 2309 // This is the account level event sent to the origin account for account owners. 2310 func (s *Server) sendAccountAuthErrorEvent(c *client, acc *Account, reason string) { 2311 if acc == nil { 2312 return 2313 } 2314 s.mu.Lock() 2315 if !s.eventsEnabled() { 2316 s.mu.Unlock() 2317 return 2318 } 2319 eid := s.nextEventID() 2320 s.mu.Unlock() 2321 2322 now := time.Now().UTC() 2323 c.mu.Lock() 2324 m := DisconnectEventMsg{ 2325 TypedEvent: TypedEvent{ 2326 Type: DisconnectEventMsgType, 2327 ID: eid, 2328 Time: now, 2329 }, 2330 Client: ClientInfo{ 2331 Start: &c.start, 2332 Stop: &now, 2333 Host: c.host, 2334 ID: c.cid, 2335 Account: acc.Name, 2336 User: c.getRawAuthUser(), 2337 Name: c.opts.Name, 2338 Lang: c.opts.Lang, 2339 Version: c.opts.Version, 2340 RTT: c.getRTT(), 2341 Jwt: c.opts.JWT, 2342 IssuerKey: issuerForClient(c), 2343 Tags: c.tags, 2344 NameTag: c.nameTag, 2345 Kind: c.kindString(), 2346 ClientType: c.clientTypeString(), 2347 MQTTClient: c.getMQTTClientID(), 2348 }, 2349 Sent: DataStats{ 2350 Msgs: c.inMsgs, 2351 Bytes: c.inBytes, 2352 }, 2353 Received: DataStats{ 2354 Msgs: c.outMsgs, 2355 Bytes: c.outBytes, 2356 }, 2357 Reason: reason, 2358 } 2359 c.mu.Unlock() 2360 2361 s.sendInternalAccountSysMsg(acc, authErrorAccountEventSubj, &m.Server, &m, noCompression) 2362 } 2363 2364 // Internal message callback. 2365 // If the msg is needed past the callback it is required to be copied. 2366 // rmsg contains header and the message. use client.msgParts(rmsg) to split them apart 2367 type msgHandler func(sub *subscription, client *client, acc *Account, subject, reply string, rmsg []byte) 2368 2369 // Create a wrapped callback handler for the subscription that will move it to an 2370 // internal recvQ for processing not inline with routes etc. 2371 func (s *Server) noInlineCallback(cb sysMsgHandler) msgHandler { 2372 s.mu.RLock() 2373 if !s.eventsEnabled() { 2374 s.mu.RUnlock() 2375 return nil 2376 } 2377 // Capture here for direct reference to avoid any unnecessary blocking inline with routes, gateways etc. 2378 recvq := s.sys.recvq 2379 s.mu.RUnlock() 2380 2381 return func(sub *subscription, c *client, acc *Account, subj, rply string, rmsg []byte) { 2382 // Need to copy and split here. 2383 hdr, msg := c.msgParts(rmsg) 2384 recvq.push(&inSysMsg{sub, c, acc, subj, rply, copyBytes(hdr), copyBytes(msg), cb}) 2385 } 2386 } 2387 2388 // Create an internal subscription. sysSubscribeQ for queue groups. 2389 func (s *Server) sysSubscribe(subject string, cb msgHandler) (*subscription, error) { 2390 return s.systemSubscribe(subject, _EMPTY_, false, nil, cb) 2391 } 2392 2393 // Create an internal subscription with queue 2394 func (s *Server) sysSubscribeQ(subject, queue string, cb msgHandler) (*subscription, error) { 2395 return s.systemSubscribe(subject, queue, false, nil, cb) 2396 } 2397 2398 // Create an internal subscription but do not forward interest. 2399 func (s *Server) sysSubscribeInternal(subject string, cb msgHandler) (*subscription, error) { 2400 return s.systemSubscribe(subject, _EMPTY_, true, nil, cb) 2401 } 2402 2403 func (s *Server) systemSubscribe(subject, queue string, internalOnly bool, c *client, cb msgHandler) (*subscription, error) { 2404 s.mu.Lock() 2405 if !s.eventsEnabled() { 2406 s.mu.Unlock() 2407 return nil, ErrNoSysAccount 2408 } 2409 if cb == nil { 2410 s.mu.Unlock() 2411 return nil, fmt.Errorf("undefined message handler") 2412 } 2413 if c == nil { 2414 c = s.sys.client 2415 } 2416 trace := c.trace 2417 s.sys.sid++ 2418 sid := strconv.Itoa(s.sys.sid) 2419 s.mu.Unlock() 2420 2421 // Now create the subscription 2422 if trace { 2423 c.traceInOp("SUB", []byte(subject+" "+queue+" "+sid)) 2424 } 2425 2426 var q []byte 2427 if queue != _EMPTY_ { 2428 q = []byte(queue) 2429 } 2430 2431 // Now create the subscription 2432 return c.processSub([]byte(subject), q, []byte(sid), cb, internalOnly) 2433 } 2434 2435 func (s *Server) sysUnsubscribe(sub *subscription) { 2436 if sub == nil { 2437 return 2438 } 2439 s.mu.RLock() 2440 if !s.eventsEnabled() { 2441 s.mu.RUnlock() 2442 return 2443 } 2444 c := sub.client 2445 s.mu.RUnlock() 2446 2447 if c != nil { 2448 c.processUnsub(sub.sid) 2449 } 2450 } 2451 2452 // This will generate the tracking subject for remote latency from the response subject. 2453 func remoteLatencySubjectForResponse(subject []byte) string { 2454 if !isTrackedReply(subject) { 2455 return "" 2456 } 2457 toks := bytes.Split(subject, []byte(tsep)) 2458 // FIXME(dlc) - Sprintf may become a performance concern at some point. 2459 return fmt.Sprintf(remoteLatencyEventSubj, toks[len(toks)-2]) 2460 } 2461 2462 // remoteLatencyUpdate is used to track remote latency measurements for tracking on exported services. 2463 func (s *Server) remoteLatencyUpdate(sub *subscription, _ *client, _ *Account, subject, _ string, hdr, msg []byte) { 2464 if !s.eventsRunning() { 2465 return 2466 } 2467 var rl remoteLatency 2468 if err := json.Unmarshal(msg, &rl); err != nil { 2469 s.Errorf("Error unmarshalling remote latency measurement: %v", err) 2470 return 2471 } 2472 // Now we need to look up the responseServiceImport associated with this measurement. 2473 acc, err := s.LookupAccount(rl.Account) 2474 if err != nil { 2475 s.Warnf("Could not lookup account %q for latency measurement", rl.Account) 2476 return 2477 } 2478 // Now get the request id / reply. We need to see if we have a GW prefix and if so strip that off. 2479 reply := rl.ReqId 2480 if gwPrefix, old := isGWRoutedSubjectAndIsOldPrefix([]byte(reply)); gwPrefix { 2481 reply = string(getSubjectFromGWRoutedReply([]byte(reply), old)) 2482 } 2483 acc.mu.RLock() 2484 si := acc.exports.responses[reply] 2485 if si == nil { 2486 acc.mu.RUnlock() 2487 return 2488 } 2489 lsub := si.latency.subject 2490 acc.mu.RUnlock() 2491 2492 si.acc.mu.Lock() 2493 m1 := si.m1 2494 m2 := rl.M2 2495 2496 // So we have not processed the response tracking measurement yet. 2497 if m1 == nil { 2498 // Store our value there for them to pick up. 2499 si.m1 = &m2 2500 } 2501 si.acc.mu.Unlock() 2502 2503 if m1 == nil { 2504 return 2505 } 2506 2507 // Calculate the correct latencies given M1 and M2. 2508 m1.merge(&m2) 2509 2510 // Clear the requesting client since we send the result here. 2511 acc.mu.Lock() 2512 si.rc = nil 2513 acc.mu.Unlock() 2514 2515 // Make sure we remove the entry here. 2516 acc.removeServiceImport(si.from) 2517 // Send the metrics 2518 s.sendInternalAccountMsg(acc, lsub, m1) 2519 } 2520 2521 // This is used for all inbox replies so that we do not send supercluster wide interest 2522 // updates for every request. Same trick used in modern NATS clients. 2523 func (s *Server) inboxReply(sub *subscription, c *client, acc *Account, subject, reply string, msg []byte) { 2524 s.mu.RLock() 2525 if !s.eventsEnabled() || s.sys.replies == nil { 2526 s.mu.RUnlock() 2527 return 2528 } 2529 cb, ok := s.sys.replies[subject] 2530 s.mu.RUnlock() 2531 2532 if ok && cb != nil { 2533 cb(sub, c, acc, subject, reply, msg) 2534 } 2535 } 2536 2537 // Copied from go client. 2538 // We could use serviceReply here instead to save some code. 2539 // I prefer these semantics for the moment, when tracing you know what this is. 2540 const ( 2541 InboxPrefix = "$SYS._INBOX." 2542 inboxPrefixLen = len(InboxPrefix) 2543 respInboxPrefixLen = inboxPrefixLen + sysHashLen + 1 2544 replySuffixLen = 8 // Gives us 62^8 2545 ) 2546 2547 // Creates an internal inbox used for replies that will be processed by the global wc handler. 2548 func (s *Server) newRespInbox() string { 2549 var b [respInboxPrefixLen + replySuffixLen]byte 2550 pres := b[:respInboxPrefixLen] 2551 copy(pres, s.sys.inboxPre) 2552 rn := rand.Int63() 2553 for i, l := respInboxPrefixLen, rn; i < len(b); i++ { 2554 b[i] = digits[l%base] 2555 l /= base 2556 } 2557 return string(b[:]) 2558 } 2559 2560 // accNumSubsReq is sent when we need to gather remote info on subs. 2561 type accNumSubsReq struct { 2562 Account string `json:"acc"` 2563 Subject string `json:"subject"` 2564 Queue []byte `json:"queue,omitempty"` 2565 } 2566 2567 // helper function to total information from results to count subs. 2568 func totalSubs(rr *SublistResult, qg []byte) (nsubs int32) { 2569 if rr == nil { 2570 return 2571 } 2572 checkSub := func(sub *subscription) { 2573 // TODO(dlc) - This could be smarter. 2574 if qg != nil && !bytes.Equal(qg, sub.queue) { 2575 return 2576 } 2577 if sub.client.kind == CLIENT || sub.client.isHubLeafNode() { 2578 nsubs++ 2579 } 2580 } 2581 if qg == nil { 2582 for _, sub := range rr.psubs { 2583 checkSub(sub) 2584 } 2585 } 2586 for _, qsub := range rr.qsubs { 2587 for _, sub := range qsub { 2588 checkSub(sub) 2589 } 2590 } 2591 return 2592 } 2593 2594 // Allows users of large systems to debug active subscribers for a given subject. 2595 // Payload should be the subject of interest. 2596 func (s *Server) debugSubscribers(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 2597 // Even though this is an internal only subscription, meaning interest was not forwarded, we could 2598 // get one here from a GW in optimistic mode. Ignore for now. 2599 // FIXME(dlc) - Should we send no interest here back to the GW? 2600 if c.kind != CLIENT { 2601 return 2602 } 2603 2604 var ci ClientInfo 2605 if len(hdr) > 0 { 2606 if err := json.Unmarshal(getHeader(ClientInfoHdr, hdr), &ci); err != nil { 2607 return 2608 } 2609 } 2610 2611 var acc *Account 2612 if ci.Service != _EMPTY_ { 2613 acc, _ = s.LookupAccount(ci.Service) 2614 } else if ci.Account != _EMPTY_ { 2615 acc, _ = s.LookupAccount(ci.Account) 2616 } else { 2617 // Direct $SYS access. 2618 acc = c.acc 2619 if acc == nil { 2620 acc = s.SystemAccount() 2621 } 2622 } 2623 if acc == nil { 2624 return 2625 } 2626 2627 // We could have a single subject or we could have a subject and a wildcard separated by whitespace. 2628 args := strings.Split(strings.TrimSpace(string(msg)), " ") 2629 if len(args) == 0 { 2630 s.sendInternalAccountMsg(acc, reply, 0) 2631 return 2632 } 2633 2634 tsubj := args[0] 2635 var qgroup []byte 2636 if len(args) > 1 { 2637 qgroup = []byte(args[1]) 2638 } 2639 2640 var nsubs int32 2641 2642 if subjectIsLiteral(tsubj) { 2643 // We will look up subscribers locally first then determine if we need to solicit other servers. 2644 rr := acc.sl.Match(tsubj) 2645 nsubs = totalSubs(rr, qgroup) 2646 } else { 2647 // We have a wildcard, so this is a bit slower path. 2648 var _subs [32]*subscription 2649 subs := _subs[:0] 2650 acc.sl.All(&subs) 2651 for _, sub := range subs { 2652 if subjectIsSubsetMatch(string(sub.subject), tsubj) { 2653 if qgroup != nil && !bytes.Equal(qgroup, sub.queue) { 2654 continue 2655 } 2656 if sub.client.kind == CLIENT || sub.client.isHubLeafNode() { 2657 nsubs++ 2658 } 2659 } 2660 } 2661 } 2662 2663 // We should have an idea of how many responses to expect from remote servers. 2664 var expected = acc.expectedRemoteResponses() 2665 2666 // If we are only local, go ahead and return. 2667 if expected == 0 { 2668 s.sendInternalAccountMsg(nil, reply, nsubs) 2669 return 2670 } 2671 2672 // We need to solicit from others. 2673 // To track status. 2674 responses := int32(0) 2675 done := make(chan (bool)) 2676 2677 s.mu.Lock() 2678 // Create direct reply inbox that we multiplex under the WC replies. 2679 replySubj := s.newRespInbox() 2680 // Store our handler. 2681 s.sys.replies[replySubj] = func(sub *subscription, _ *client, _ *Account, subject, _ string, msg []byte) { 2682 if n, err := strconv.Atoi(string(msg)); err == nil { 2683 atomic.AddInt32(&nsubs, int32(n)) 2684 } 2685 if atomic.AddInt32(&responses, 1) >= expected { 2686 select { 2687 case done <- true: 2688 default: 2689 } 2690 } 2691 } 2692 // Send the request to the other servers. 2693 request := &accNumSubsReq{ 2694 Account: acc.Name, 2695 Subject: tsubj, 2696 Queue: qgroup, 2697 } 2698 s.sendInternalMsg(accNumSubsReqSubj, replySubj, nil, request) 2699 s.mu.Unlock() 2700 2701 // FIXME(dlc) - We should rate limit here instead of blind Go routine. 2702 go func() { 2703 select { 2704 case <-done: 2705 case <-time.After(500 * time.Millisecond): 2706 } 2707 // Cleanup the WC entry. 2708 var sendResponse bool 2709 s.mu.Lock() 2710 if s.sys != nil && s.sys.replies != nil { 2711 delete(s.sys.replies, replySubj) 2712 sendResponse = true 2713 } 2714 s.mu.Unlock() 2715 if sendResponse { 2716 // Send the response. 2717 s.sendInternalAccountMsg(nil, reply, atomic.LoadInt32(&nsubs)) 2718 } 2719 }() 2720 } 2721 2722 // Request for our local subscription count. This will come from a remote origin server 2723 // that received the initial request. 2724 func (s *Server) nsubsRequest(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 2725 if !s.eventsRunning() { 2726 return 2727 } 2728 m := accNumSubsReq{} 2729 if len(msg) == 0 { 2730 s.sys.client.Errorf("request requires a body") 2731 return 2732 } else if err := json.Unmarshal(msg, &m); err != nil { 2733 s.sys.client.Errorf("Error unmarshalling account nsubs request message: %v", err) 2734 return 2735 } 2736 // Grab account. 2737 acc, _ := s.lookupAccount(m.Account) 2738 if acc == nil || acc.numLocalAndLeafConnections() == 0 { 2739 return 2740 } 2741 // We will look up subscribers locally first then determine if we need to solicit other servers. 2742 var nsubs int32 2743 if subjectIsLiteral(m.Subject) { 2744 rr := acc.sl.Match(m.Subject) 2745 nsubs = totalSubs(rr, m.Queue) 2746 } else { 2747 // We have a wildcard, so this is a bit slower path. 2748 var _subs [32]*subscription 2749 subs := _subs[:0] 2750 acc.sl.All(&subs) 2751 for _, sub := range subs { 2752 if (sub.client.kind == CLIENT || sub.client.isHubLeafNode()) && subjectIsSubsetMatch(string(sub.subject), m.Subject) { 2753 if m.Queue != nil && !bytes.Equal(m.Queue, sub.queue) { 2754 continue 2755 } 2756 nsubs++ 2757 } 2758 } 2759 } 2760 s.sendInternalMsgLocked(reply, _EMPTY_, nil, nsubs) 2761 } 2762 2763 func (s *Server) reloadConfig(sub *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 2764 if !s.eventsRunning() { 2765 return 2766 } 2767 2768 optz := &EventFilterOptions{} 2769 s.zReq(c, reply, hdr, msg, optz, optz, func() (any, error) { 2770 // Reload the server config, as requested. 2771 return nil, s.Reload() 2772 }) 2773 } 2774 2775 type KickClientReq struct { 2776 CID uint64 `json:"cid"` 2777 } 2778 2779 type LDMClientReq struct { 2780 CID uint64 `json:"cid"` 2781 } 2782 2783 func (s *Server) kickClient(_ *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 2784 if !s.eventsRunning() { 2785 return 2786 } 2787 2788 var req KickClientReq 2789 if err := json.Unmarshal(msg, &req); err != nil { 2790 s.sys.client.Errorf("Error unmarshalling kick client request: %v", err) 2791 return 2792 } 2793 2794 optz := &EventFilterOptions{} 2795 s.zReq(c, reply, hdr, msg, optz, optz, func() (any, error) { 2796 return nil, s.DisconnectClientByID(req.CID) 2797 }) 2798 2799 } 2800 2801 func (s *Server) ldmClient(_ *subscription, c *client, _ *Account, subject, reply string, hdr, msg []byte) { 2802 if !s.eventsRunning() { 2803 return 2804 } 2805 2806 var req LDMClientReq 2807 if err := json.Unmarshal(msg, &req); err != nil { 2808 s.sys.client.Errorf("Error unmarshalling kick client request: %v", err) 2809 return 2810 } 2811 2812 optz := &EventFilterOptions{} 2813 s.zReq(c, reply, hdr, msg, optz, optz, func() (any, error) { 2814 return nil, s.LDMClientByID(req.CID) 2815 }) 2816 } 2817 2818 // Helper to grab account name for a client. 2819 func accForClient(c *client) string { 2820 if c.acc != nil { 2821 return c.acc.Name 2822 } 2823 return "N/A" 2824 } 2825 2826 // Helper to grab issuer for a client. 2827 func issuerForClient(c *client) (issuerKey string) { 2828 if c == nil || c.user == nil { 2829 return 2830 } 2831 issuerKey = c.user.SigningKey 2832 if issuerKey == _EMPTY_ && c.user.Account != nil { 2833 issuerKey = c.user.Account.Name 2834 } 2835 return 2836 } 2837 2838 // Helper to clear timers. 2839 func clearTimer(tp **time.Timer) { 2840 if t := *tp; t != nil { 2841 t.Stop() 2842 *tp = nil 2843 } 2844 } 2845 2846 // Helper function to wrap functions with common test 2847 // to lock server and return if events not enabled. 2848 func (s *Server) wrapChk(f func()) func() { 2849 return func() { 2850 s.mu.Lock() 2851 if !s.eventsEnabled() { 2852 s.mu.Unlock() 2853 return 2854 } 2855 f() 2856 s.mu.Unlock() 2857 } 2858 } 2859 2860 // sendOCSPPeerRejectEvent sends a system level event to system account when a peer connection is 2861 // rejected due to OCSP invalid status of its trust chain(s). 2862 func (s *Server) sendOCSPPeerRejectEvent(kind string, peer *x509.Certificate, reason string) { 2863 s.mu.Lock() 2864 defer s.mu.Unlock() 2865 if !s.eventsEnabled() { 2866 return 2867 } 2868 if peer == nil { 2869 s.Errorf(certidp.ErrPeerEmptyNoEvent) 2870 return 2871 } 2872 eid := s.nextEventID() 2873 now := time.Now().UTC() 2874 m := OCSPPeerRejectEventMsg{ 2875 TypedEvent: TypedEvent{ 2876 Type: OCSPPeerRejectEventMsgType, 2877 ID: eid, 2878 Time: now, 2879 }, 2880 Kind: kind, 2881 Peer: certidp.CertInfo{ 2882 Subject: certidp.GetSubjectDNForm(peer), 2883 Issuer: certidp.GetIssuerDNForm(peer), 2884 Fingerprint: certidp.GenerateFingerprint(peer), 2885 Raw: peer.Raw, 2886 }, 2887 Reason: reason, 2888 } 2889 subj := fmt.Sprintf(ocspPeerRejectEventSubj, s.info.ID) 2890 s.sendInternalMsg(subj, _EMPTY_, &m.Server, &m) 2891 } 2892 2893 // sendOCSPPeerChainlinkInvalidEvent sends a system level event to system account when a link in a peer's trust chain 2894 // is OCSP invalid. 2895 func (s *Server) sendOCSPPeerChainlinkInvalidEvent(peer *x509.Certificate, link *x509.Certificate, reason string) { 2896 s.mu.Lock() 2897 defer s.mu.Unlock() 2898 if !s.eventsEnabled() { 2899 return 2900 } 2901 if peer == nil || link == nil { 2902 s.Errorf(certidp.ErrPeerEmptyNoEvent) 2903 return 2904 } 2905 eid := s.nextEventID() 2906 now := time.Now().UTC() 2907 m := OCSPPeerChainlinkInvalidEventMsg{ 2908 TypedEvent: TypedEvent{ 2909 Type: OCSPPeerChainlinkInvalidEventMsgType, 2910 ID: eid, 2911 Time: now, 2912 }, 2913 Link: certidp.CertInfo{ 2914 Subject: certidp.GetSubjectDNForm(link), 2915 Issuer: certidp.GetIssuerDNForm(link), 2916 Fingerprint: certidp.GenerateFingerprint(link), 2917 Raw: link.Raw, 2918 }, 2919 Peer: certidp.CertInfo{ 2920 Subject: certidp.GetSubjectDNForm(peer), 2921 Issuer: certidp.GetIssuerDNForm(peer), 2922 Fingerprint: certidp.GenerateFingerprint(peer), 2923 Raw: peer.Raw, 2924 }, 2925 Reason: reason, 2926 } 2927 subj := fmt.Sprintf(ocspPeerChainlinkInvalidEventSubj, s.info.ID) 2928 s.sendInternalMsg(subj, _EMPTY_, &m.Server, &m) 2929 }