github.com/nats-io/nats-server/v2@v2.11.0-preview.2/server/stream.go (about) 1 // Copyright 2019-2024 The NATS Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package server 15 16 import ( 17 "archive/tar" 18 "bytes" 19 "encoding/binary" 20 "encoding/json" 21 "errors" 22 "fmt" 23 "io" 24 "math" 25 "math/rand" 26 "os" 27 "path/filepath" 28 "reflect" 29 "strconv" 30 "strings" 31 "sync" 32 "sync/atomic" 33 "time" 34 35 "github.com/klauspost/compress/s2" 36 "github.com/nats-io/nuid" 37 ) 38 39 // StreamConfig will determine the name, subjects and retention policy 40 // for a given stream. If subjects is empty the name will be used. 41 type StreamConfig struct { 42 Name string `json:"name"` 43 Description string `json:"description,omitempty"` 44 Subjects []string `json:"subjects,omitempty"` 45 Retention RetentionPolicy `json:"retention"` 46 MaxConsumers int `json:"max_consumers"` 47 MaxMsgs int64 `json:"max_msgs"` 48 MaxBytes int64 `json:"max_bytes"` 49 MaxAge time.Duration `json:"max_age"` 50 MaxMsgsPer int64 `json:"max_msgs_per_subject"` 51 MaxMsgSize int32 `json:"max_msg_size,omitempty"` 52 Discard DiscardPolicy `json:"discard"` 53 Storage StorageType `json:"storage"` 54 Replicas int `json:"num_replicas"` 55 NoAck bool `json:"no_ack,omitempty"` 56 Template string `json:"template_owner,omitempty"` 57 Duplicates time.Duration `json:"duplicate_window,omitempty"` 58 Placement *Placement `json:"placement,omitempty"` 59 Mirror *StreamSource `json:"mirror,omitempty"` 60 Sources []*StreamSource `json:"sources,omitempty"` 61 Compression StoreCompression `json:"compression"` 62 FirstSeq uint64 `json:"first_seq,omitempty"` 63 64 // Allow applying a subject transform to incoming messages before doing anything else 65 SubjectTransform *SubjectTransformConfig `json:"subject_transform,omitempty"` 66 67 // Allow republish of the message after being sequenced and stored. 68 RePublish *RePublish `json:"republish,omitempty"` 69 70 // Allow higher performance, direct access to get individual messages. E.g. KeyValue 71 AllowDirect bool `json:"allow_direct"` 72 // Allow higher performance and unified direct access for mirrors as well. 73 MirrorDirect bool `json:"mirror_direct"` 74 75 // Allow KV like semantics to also discard new on a per subject basis 76 DiscardNewPer bool `json:"discard_new_per_subject,omitempty"` 77 78 // Optional qualifiers. These can not be modified after set to true. 79 80 // Sealed will seal a stream so no messages can get out or in. 81 Sealed bool `json:"sealed"` 82 // DenyDelete will restrict the ability to delete messages. 83 DenyDelete bool `json:"deny_delete"` 84 // DenyPurge will restrict the ability to purge messages. 85 DenyPurge bool `json:"deny_purge"` 86 // AllowRollup allows messages to be placed into the system and purge 87 // all older messages using a special msg header. 88 AllowRollup bool `json:"allow_rollup_hdrs"` 89 90 // The following defaults will apply to consumers when created against 91 // this stream, unless overridden manually. 92 // TODO(nat): Can/should we name these better? 93 ConsumerLimits StreamConsumerLimits `json:"consumer_limits"` 94 95 // Metadata is additional metadata for the Stream. 96 Metadata map[string]string `json:"metadata,omitempty"` 97 } 98 99 type StreamConsumerLimits struct { 100 InactiveThreshold time.Duration `json:"inactive_threshold,omitempty"` 101 MaxAckPending int `json:"max_ack_pending,omitempty"` 102 } 103 104 // SubjectTransformConfig is for applying a subject transform (to matching messages) before doing anything else when a new message is received 105 type SubjectTransformConfig struct { 106 Source string `json:"src"` 107 Destination string `json:"dest"` 108 } 109 110 // RePublish is for republishing messages once committed to a stream. 111 type RePublish struct { 112 Source string `json:"src,omitempty"` 113 Destination string `json:"dest"` 114 HeadersOnly bool `json:"headers_only,omitempty"` 115 } 116 117 // JSPubAckResponse is a formal response to a publish operation. 118 type JSPubAckResponse struct { 119 Error *ApiError `json:"error,omitempty"` 120 *PubAck 121 } 122 123 // ToError checks if the response has a error and if it does converts it to an error 124 // avoiding the pitfalls described by https://yourbasic.org/golang/gotcha-why-nil-error-not-equal-nil/ 125 func (r *JSPubAckResponse) ToError() error { 126 if r.Error == nil { 127 return nil 128 } 129 return r.Error 130 } 131 132 // PubAck is the detail you get back from a publish to a stream that was successful. 133 // e.g. +OK {"stream": "Orders", "seq": 22} 134 type PubAck struct { 135 Stream string `json:"stream"` 136 Sequence uint64 `json:"seq"` 137 Domain string `json:"domain,omitempty"` 138 Duplicate bool `json:"duplicate,omitempty"` 139 } 140 141 // StreamInfo shows config and current state for this stream. 142 type StreamInfo struct { 143 Config StreamConfig `json:"config"` 144 Created time.Time `json:"created"` 145 State StreamState `json:"state"` 146 Domain string `json:"domain,omitempty"` 147 Cluster *ClusterInfo `json:"cluster,omitempty"` 148 Mirror *StreamSourceInfo `json:"mirror,omitempty"` 149 Sources []*StreamSourceInfo `json:"sources,omitempty"` 150 Alternates []StreamAlternate `json:"alternates,omitempty"` 151 // TimeStamp indicates when the info was gathered 152 TimeStamp time.Time `json:"ts"` 153 } 154 155 type StreamAlternate struct { 156 Name string `json:"name"` 157 Domain string `json:"domain,omitempty"` 158 Cluster string `json:"cluster"` 159 } 160 161 // ClusterInfo shows information about the underlying set of servers 162 // that make up the stream or consumer. 163 type ClusterInfo struct { 164 Name string `json:"name,omitempty"` 165 RaftGroup string `json:"raft_group,omitempty"` 166 Leader string `json:"leader,omitempty"` 167 Replicas []*PeerInfo `json:"replicas,omitempty"` 168 } 169 170 // PeerInfo shows information about all the peers in the cluster that 171 // are supporting the stream or consumer. 172 type PeerInfo struct { 173 Name string `json:"name"` 174 Current bool `json:"current"` 175 Offline bool `json:"offline,omitempty"` 176 Active time.Duration `json:"active"` 177 Lag uint64 `json:"lag,omitempty"` 178 Peer string `json:"peer"` 179 // For migrations. 180 cluster string 181 } 182 183 // StreamSourceInfo shows information about an upstream stream source. 184 type StreamSourceInfo struct { 185 Name string `json:"name"` 186 External *ExternalStream `json:"external,omitempty"` 187 Lag uint64 `json:"lag"` 188 Active time.Duration `json:"active"` 189 Error *ApiError `json:"error,omitempty"` 190 FilterSubject string `json:"filter_subject,omitempty"` 191 SubjectTransforms []SubjectTransformConfig `json:"subject_transforms,omitempty"` 192 } 193 194 // StreamSource dictates how streams can source from other streams. 195 type StreamSource struct { 196 Name string `json:"name"` 197 OptStartSeq uint64 `json:"opt_start_seq,omitempty"` 198 OptStartTime *time.Time `json:"opt_start_time,omitempty"` 199 FilterSubject string `json:"filter_subject,omitempty"` 200 SubjectTransforms []SubjectTransformConfig `json:"subject_transforms,omitempty"` 201 External *ExternalStream `json:"external,omitempty"` 202 203 // Internal 204 iname string // For indexing when stream names are the same for multiple sources. 205 } 206 207 // ExternalStream allows you to qualify access to a stream source in another account. 208 type ExternalStream struct { 209 ApiPrefix string `json:"api"` 210 DeliverPrefix string `json:"deliver"` 211 } 212 213 // Stream is a jetstream stream of messages. When we receive a message internally destined 214 // for a Stream we will direct link from the client to this structure. 215 type stream struct { 216 mu sync.RWMutex 217 js *jetStream 218 jsa *jsAccount 219 acc *Account 220 srv *Server 221 client *client 222 sysc *client 223 sid atomic.Uint64 224 pubAck []byte 225 outq *jsOutQ 226 msgs *ipQueue[*inMsg] 227 gets *ipQueue[*directGetReq] 228 store StreamStore 229 ackq *ipQueue[uint64] 230 lseq uint64 231 lmsgId string 232 consumers map[string]*consumer 233 numFilter int // number of filtered consumers 234 cfg StreamConfig 235 created time.Time 236 stype StorageType 237 tier string 238 ddmap map[string]*ddentry 239 ddarr []*ddentry 240 ddindex int 241 ddtmr *time.Timer 242 qch chan struct{} 243 mqch chan struct{} 244 active bool 245 ddloaded bool 246 closed atomic.Bool 247 248 // Mirror 249 mirror *sourceInfo 250 251 // Sources 252 sources map[string]*sourceInfo 253 sourceRetries map[string]*time.Timer 254 sourcesConsumerSetup *time.Timer 255 256 // Indicates we have direct consumers. 257 directs int 258 259 // For input subject transform 260 itr *subjectTransform 261 262 // For republishing. 263 tr *subjectTransform 264 265 // For processing consumers without main stream lock. 266 clsMu sync.RWMutex 267 cList []*consumer 268 sch chan struct{} 269 sigq *ipQueue[*cMsg] 270 csl *Sublist // Consumer Sublist 271 272 // Leader will store seq/msgTrace in clustering mode. Used in applyStreamEntries 273 // to know if trace event should be sent after processing. 274 mt map[uint64]*msgTrace 275 276 // For non limits policy streams when they process an ack before the actual msg. 277 // Can happen in stretch clusters, multi-cloud, or during catchup for a restarted server. 278 preAcks map[uint64]map[*consumer]struct{} 279 280 // TODO(dlc) - Hide everything below behind two pointers. 281 // Clustered mode. 282 sa *streamAssignment 283 node RaftNode 284 catchup atomic.Bool 285 syncSub *subscription 286 infoSub *subscription 287 clMu sync.Mutex 288 clseq uint64 289 clfs uint64 290 inflight map[uint64]uint64 291 leader string 292 lqsent time.Time 293 catchups map[string]uint64 294 uch chan struct{} 295 compressOK bool 296 inMonitor bool 297 298 // Direct get subscription. 299 directSub *subscription 300 lastBySub *subscription 301 302 monitorWg sync.WaitGroup 303 } 304 305 type sourceInfo struct { 306 name string 307 iname string 308 cname string 309 sub *subscription 310 dsub *subscription 311 lbsub *subscription 312 msgs *ipQueue[*inMsg] 313 sseq uint64 314 dseq uint64 315 start time.Time 316 lag uint64 317 err *ApiError 318 fails int 319 last time.Time 320 lreq time.Time 321 qch chan struct{} 322 sip bool // setup in progress 323 wg sync.WaitGroup 324 sf string // subject filter 325 sfs []string // subject filters 326 trs []*subjectTransform // subject transforms 327 } 328 329 // For mirrors and direct get 330 const ( 331 dgetGroup = sysGroup 332 dgetCaughtUpThresh = 10 333 ) 334 335 // Headers for published messages. 336 const ( 337 JSMsgId = "Nats-Msg-Id" 338 JSExpectedStream = "Nats-Expected-Stream" 339 JSExpectedLastSeq = "Nats-Expected-Last-Sequence" 340 JSExpectedLastSubjSeq = "Nats-Expected-Last-Subject-Sequence" 341 JSExpectedLastMsgId = "Nats-Expected-Last-Msg-Id" 342 JSStreamSource = "Nats-Stream-Source" 343 JSLastConsumerSeq = "Nats-Last-Consumer" 344 JSLastStreamSeq = "Nats-Last-Stream" 345 JSConsumerStalled = "Nats-Consumer-Stalled" 346 JSMsgRollup = "Nats-Rollup" 347 JSMsgSize = "Nats-Msg-Size" 348 JSResponseType = "Nats-Response-Type" 349 ) 350 351 // Headers for republished messages and direct gets. 352 const ( 353 JSStream = "Nats-Stream" 354 JSSequence = "Nats-Sequence" 355 JSTimeStamp = "Nats-Time-Stamp" 356 JSSubject = "Nats-Subject" 357 JSLastSequence = "Nats-Last-Sequence" 358 JSNumPending = "Nats-Num-Pending" 359 JSUpToSequence = "Nats-UpTo-Sequence" 360 ) 361 362 // Rollups, can be subject only or all messages. 363 const ( 364 JSMsgRollupSubject = "sub" 365 JSMsgRollupAll = "all" 366 ) 367 368 const ( 369 jsCreateResponse = "create" 370 ) 371 372 // Dedupe entry 373 type ddentry struct { 374 id string 375 seq uint64 376 ts int64 377 } 378 379 // Replicas Range 380 const StreamMaxReplicas = 5 381 382 // AddStream adds a stream for the given account. 383 func (a *Account) addStream(config *StreamConfig) (*stream, error) { 384 return a.addStreamWithAssignment(config, nil, nil) 385 } 386 387 // AddStreamWithStore adds a stream for the given account with custome store config options. 388 func (a *Account) addStreamWithStore(config *StreamConfig, fsConfig *FileStoreConfig) (*stream, error) { 389 return a.addStreamWithAssignment(config, fsConfig, nil) 390 } 391 392 func (a *Account) addStreamWithAssignment(config *StreamConfig, fsConfig *FileStoreConfig, sa *streamAssignment) (*stream, error) { 393 s, jsa, err := a.checkForJetStream() 394 if err != nil { 395 return nil, err 396 } 397 398 // If we do not have the stream currently assigned to us in cluster mode we will proceed but warn. 399 // This can happen on startup with restored state where on meta replay we still do not have 400 // the assignment. Running in single server mode this always returns true. 401 if !jsa.streamAssigned(config.Name) { 402 s.Debugf("Stream '%s > %s' does not seem to be assigned to this server", a.Name, config.Name) 403 } 404 405 // Sensible defaults. 406 cfg, apiErr := s.checkStreamCfg(config, a) 407 if apiErr != nil { 408 return nil, apiErr 409 } 410 411 singleServerMode := !s.JetStreamIsClustered() && s.standAloneMode() 412 if singleServerMode && cfg.Replicas > 1 { 413 return nil, ApiErrors[JSStreamReplicasNotSupportedErr] 414 } 415 416 // Make sure we are ok when these are done in parallel. 417 // We used to call Add(1) in the "else" clause of the "if loaded" 418 // statement. This caused a data race because it was possible 419 // that one go routine stores (with count==0) and another routine 420 // gets "loaded==true" and calls wg.Wait() while the other routine 421 // then calls wg.Add(1). It also could mean that two routines execute 422 // the rest of the code concurrently. 423 swg := &sync.WaitGroup{} 424 swg.Add(1) 425 v, loaded := jsa.inflight.LoadOrStore(cfg.Name, swg) 426 wg := v.(*sync.WaitGroup) 427 if loaded { 428 wg.Wait() 429 // This waitgroup is "thrown away" (since there was an existing one). 430 swg.Done() 431 } else { 432 defer func() { 433 jsa.inflight.Delete(cfg.Name) 434 wg.Done() 435 }() 436 } 437 438 js, isClustered := jsa.jetStreamAndClustered() 439 jsa.mu.Lock() 440 if mset, ok := jsa.streams[cfg.Name]; ok { 441 jsa.mu.Unlock() 442 // Check to see if configs are same. 443 ocfg := mset.config() 444 445 // set the index name on cfg since it would not contain a value for iname while the return from mset.config() does to ensure the DeepEqual works 446 for _, s := range cfg.Sources { 447 s.setIndexName() 448 } 449 450 if reflect.DeepEqual(ocfg, cfg) { 451 if sa != nil { 452 mset.setStreamAssignment(sa) 453 } 454 return mset, nil 455 } else { 456 return nil, ApiErrors[JSStreamNameExistErr] 457 } 458 } 459 jsa.usageMu.RLock() 460 selected, tier, hasTier := jsa.selectLimits(&cfg) 461 jsa.usageMu.RUnlock() 462 reserved := int64(0) 463 if !isClustered { 464 reserved = jsa.tieredReservation(tier, &cfg) 465 } 466 jsa.mu.Unlock() 467 468 if !hasTier { 469 return nil, NewJSNoLimitsError() 470 } 471 js.mu.RLock() 472 if isClustered { 473 _, reserved = tieredStreamAndReservationCount(js.cluster.streams[a.Name], tier, &cfg) 474 } 475 if err := js.checkAllLimits(&selected, &cfg, reserved, 0); err != nil { 476 js.mu.RUnlock() 477 return nil, err 478 } 479 js.mu.RUnlock() 480 jsa.mu.Lock() 481 // Check for template ownership if present. 482 if cfg.Template != _EMPTY_ && jsa.account != nil { 483 if !jsa.checkTemplateOwnership(cfg.Template, cfg.Name) { 484 jsa.mu.Unlock() 485 return nil, fmt.Errorf("stream not owned by template") 486 } 487 } 488 489 // If mirror, check if the transforms (if any) are valid. 490 if cfg.Mirror != nil { 491 if len(cfg.Mirror.SubjectTransforms) == 0 { 492 if cfg.Mirror.FilterSubject != _EMPTY_ && !IsValidSubject(cfg.Mirror.FilterSubject) { 493 jsa.mu.Unlock() 494 return nil, fmt.Errorf("subject filter '%s' for the mirror %w", cfg.Mirror.FilterSubject, ErrBadSubject) 495 } 496 } else { 497 for _, st := range cfg.Mirror.SubjectTransforms { 498 if st.Source != _EMPTY_ && !IsValidSubject(st.Source) { 499 jsa.mu.Unlock() 500 return nil, fmt.Errorf("invalid subject transform source '%s' for the mirror: %w", st.Source, ErrBadSubject) 501 } 502 // check the transform, if any, is valid 503 if st.Destination != _EMPTY_ { 504 if _, err = NewSubjectTransform(st.Source, st.Destination); err != nil { 505 jsa.mu.Unlock() 506 return nil, fmt.Errorf("subject transform from '%s' to '%s' for the mirror: %w", st.Source, st.Destination, err) 507 } 508 } 509 } 510 } 511 } 512 513 // Setup our internal indexed names here for sources and check if the transforms (if any) are valid. 514 for _, ssi := range cfg.Sources { 515 if len(ssi.SubjectTransforms) == 0 { 516 // check the filter, if any, is valid 517 if ssi.FilterSubject != _EMPTY_ && !IsValidSubject(ssi.FilterSubject) { 518 jsa.mu.Unlock() 519 return nil, fmt.Errorf("subject filter '%s' for the source: %w", ssi.FilterSubject, ErrBadSubject) 520 } 521 } else { 522 for _, st := range ssi.SubjectTransforms { 523 if st.Source != _EMPTY_ && !IsValidSubject(st.Source) { 524 jsa.mu.Unlock() 525 return nil, fmt.Errorf("subject filter '%s' for the source: %w", st.Source, ErrBadSubject) 526 } 527 // check the transform, if any, is valid 528 if st.Destination != _EMPTY_ { 529 if _, err = NewSubjectTransform(st.Source, st.Destination); err != nil { 530 jsa.mu.Unlock() 531 return nil, fmt.Errorf("subject transform from '%s' to '%s' for the source: %w", st.Source, st.Destination, err) 532 } 533 } 534 } 535 } 536 } 537 538 // Check for overlapping subjects with other streams. 539 // These are not allowed for now. 540 if jsa.subjectsOverlap(cfg.Subjects, nil) { 541 jsa.mu.Unlock() 542 return nil, NewJSStreamSubjectOverlapError() 543 } 544 545 if !hasTier { 546 jsa.mu.Unlock() 547 return nil, fmt.Errorf("no applicable tier found") 548 } 549 550 // Setup the internal clients. 551 c := s.createInternalJetStreamClient() 552 ic := s.createInternalJetStreamClient() 553 554 qpfx := fmt.Sprintf("[ACC:%s] stream '%s' ", a.Name, config.Name) 555 mset := &stream{ 556 acc: a, 557 jsa: jsa, 558 cfg: cfg, 559 js: js, 560 srv: s, 561 client: c, 562 sysc: ic, 563 tier: tier, 564 stype: cfg.Storage, 565 consumers: make(map[string]*consumer), 566 msgs: newIPQueue[*inMsg](s, qpfx+"messages"), 567 gets: newIPQueue[*directGetReq](s, qpfx+"direct gets"), 568 qch: make(chan struct{}), 569 mqch: make(chan struct{}), 570 uch: make(chan struct{}, 4), 571 sch: make(chan struct{}, 1), 572 } 573 574 // Start our signaling routine to process consumers. 575 mset.sigq = newIPQueue[*cMsg](s, qpfx+"obs") // of *cMsg 576 go mset.signalConsumersLoop() 577 578 // For no-ack consumers when we are interest retention. 579 if cfg.Retention != LimitsPolicy { 580 mset.ackq = newIPQueue[uint64](s, qpfx+"acks") 581 } 582 583 // Check for input subject transform 584 if cfg.SubjectTransform != nil { 585 tr, err := NewSubjectTransform(cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination) 586 if err != nil { 587 jsa.mu.Unlock() 588 return nil, fmt.Errorf("stream subject transform from '%s' to '%s': %w", cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination, err) 589 } 590 mset.itr = tr 591 } 592 593 // Check for RePublish. 594 if cfg.RePublish != nil { 595 tr, err := NewSubjectTransform(cfg.RePublish.Source, cfg.RePublish.Destination) 596 if err != nil { 597 jsa.mu.Unlock() 598 return nil, fmt.Errorf("stream republish transform from '%s' to '%s': %w", cfg.RePublish.Source, cfg.RePublish.Destination, err) 599 } 600 // Assign our transform for republishing. 601 mset.tr = tr 602 } 603 storeDir := filepath.Join(jsa.storeDir, streamsDir, cfg.Name) 604 jsa.mu.Unlock() 605 606 // Bind to the user account. 607 c.registerWithAccount(a) 608 // Bind to the system account. 609 ic.registerWithAccount(s.SystemAccount()) 610 611 // Create the appropriate storage 612 fsCfg := fsConfig 613 if fsCfg == nil { 614 fsCfg = &FileStoreConfig{} 615 // If we are file based and not explicitly configured 616 // we may be able to auto-tune based on max msgs or bytes. 617 if cfg.Storage == FileStorage { 618 mset.autoTuneFileStorageBlockSize(fsCfg) 619 } 620 } 621 fsCfg.StoreDir = storeDir 622 fsCfg.AsyncFlush = false 623 // Grab configured sync interval. 624 fsCfg.SyncInterval = s.getOpts().SyncInterval 625 fsCfg.SyncAlways = s.getOpts().SyncAlways 626 fsCfg.Compression = config.Compression 627 628 if err := mset.setupStore(fsCfg); err != nil { 629 mset.stop(true, false) 630 return nil, NewJSStreamStoreFailedError(err) 631 } 632 633 // Create our pubAck template here. Better than json marshal each time on success. 634 if domain := s.getOpts().JetStreamDomain; domain != _EMPTY_ { 635 mset.pubAck = []byte(fmt.Sprintf("{%q:%q, %q:%q, %q:", "stream", cfg.Name, "domain", domain, "seq")) 636 } else { 637 mset.pubAck = []byte(fmt.Sprintf("{%q:%q, %q:", "stream", cfg.Name, "seq")) 638 } 639 end := len(mset.pubAck) 640 mset.pubAck = mset.pubAck[:end:end] 641 642 // Set our known last sequence. 643 var state StreamState 644 mset.store.FastState(&state) 645 646 // Possible race with consumer.setLeader during recovery. 647 mset.mu.RLock() 648 mset.lseq = state.LastSeq 649 mset.mu.RUnlock() 650 651 // If no msgs (new stream), set dedupe state loaded to true. 652 if state.Msgs == 0 { 653 mset.ddloaded = true 654 } 655 656 // Set our stream assignment if in clustered mode. 657 if sa != nil { 658 mset.setStreamAssignment(sa) 659 } 660 661 // Setup our internal send go routine. 662 mset.setupSendCapabilities() 663 664 // Reserve resources if MaxBytes present. 665 mset.js.reserveStreamResources(&mset.cfg) 666 667 // Call directly to set leader if not in clustered mode. 668 // This can be called though before we actually setup clustering, so check both. 669 if singleServerMode { 670 if err := mset.setLeader(true); err != nil { 671 mset.stop(true, false) 672 return nil, err 673 } 674 } 675 676 // This is always true in single server mode. 677 if mset.IsLeader() { 678 // Send advisory. 679 var suppress bool 680 if !s.standAloneMode() && sa == nil { 681 if cfg.Replicas > 1 { 682 suppress = true 683 } 684 } else if sa != nil { 685 suppress = sa.responded 686 } 687 if !suppress { 688 mset.sendCreateAdvisory() 689 } 690 } 691 692 // Register with our account last. 693 jsa.mu.Lock() 694 jsa.streams[cfg.Name] = mset 695 jsa.mu.Unlock() 696 697 return mset, nil 698 } 699 700 // Composes the index name. Contains the stream name, subject filter, and transform destination 701 // when the stream is external we will use additional information in case the (external) stream names are the same. 702 func (ssi *StreamSource) composeIName() string { 703 var iName = ssi.Name 704 705 if ssi.External != nil { 706 iName = iName + ":" + getHash(ssi.External.ApiPrefix) 707 } 708 709 source := ssi.FilterSubject 710 destination := fwcs 711 712 if len(ssi.SubjectTransforms) == 0 { 713 // normalize filter and destination in case they are empty 714 if source == _EMPTY_ { 715 source = fwcs 716 } 717 if destination == _EMPTY_ { 718 destination = fwcs 719 } 720 } else { 721 var sources, destinations []string 722 723 for _, tr := range ssi.SubjectTransforms { 724 trsrc, trdest := tr.Source, tr.Destination 725 if trsrc == _EMPTY_ { 726 trsrc = fwcs 727 } 728 if trdest == _EMPTY_ { 729 trdest = fwcs 730 } 731 sources = append(sources, trsrc) 732 destinations = append(destinations, trdest) 733 } 734 source = strings.Join(sources, "\f") 735 destination = strings.Join(destinations, "\f") 736 } 737 738 return strings.Join([]string{iName, source, destination}, " ") 739 } 740 741 // Sets the index name. 742 func (ssi *StreamSource) setIndexName() { 743 ssi.iname = ssi.composeIName() 744 } 745 746 func (mset *stream) streamAssignment() *streamAssignment { 747 mset.mu.RLock() 748 defer mset.mu.RUnlock() 749 return mset.sa 750 } 751 752 func (mset *stream) setStreamAssignment(sa *streamAssignment) { 753 var node RaftNode 754 var peers []string 755 756 mset.mu.RLock() 757 js := mset.js 758 mset.mu.RUnlock() 759 760 if js != nil { 761 js.mu.RLock() 762 if sa.Group != nil { 763 node = sa.Group.node 764 peers = sa.Group.Peers 765 } 766 js.mu.RUnlock() 767 } 768 769 mset.mu.Lock() 770 defer mset.mu.Unlock() 771 772 mset.sa = sa 773 if sa == nil { 774 return 775 } 776 777 // Set our node. 778 mset.node = node 779 if mset.node != nil { 780 mset.node.UpdateKnownPeers(peers) 781 } 782 783 // Setup our info sub here as well for all stream members. This is now by design. 784 if mset.infoSub == nil { 785 isubj := fmt.Sprintf(clusterStreamInfoT, mset.jsa.acc(), mset.cfg.Name) 786 // Note below the way we subscribe here is so that we can send requests to ourselves. 787 mset.infoSub, _ = mset.srv.systemSubscribe(isubj, _EMPTY_, false, mset.sysc, mset.handleClusterStreamInfoRequest) 788 } 789 790 // Trigger update chan. 791 select { 792 case mset.uch <- struct{}{}: 793 default: 794 } 795 } 796 797 func (mset *stream) monitorQuitC() <-chan struct{} { 798 if mset == nil { 799 return nil 800 } 801 mset.mu.RLock() 802 defer mset.mu.RUnlock() 803 return mset.mqch 804 } 805 806 func (mset *stream) updateC() <-chan struct{} { 807 if mset == nil { 808 return nil 809 } 810 mset.mu.RLock() 811 defer mset.mu.RUnlock() 812 return mset.uch 813 } 814 815 // IsLeader will return if we are the current leader. 816 func (mset *stream) IsLeader() bool { 817 mset.mu.RLock() 818 defer mset.mu.RUnlock() 819 return mset.isLeader() 820 } 821 822 // Lock should be held. 823 func (mset *stream) isLeader() bool { 824 if mset.isClustered() { 825 return mset.node.Leader() 826 } 827 return true 828 } 829 830 // TODO(dlc) - Check to see if we can accept being the leader or we should step down. 831 func (mset *stream) setLeader(isLeader bool) error { 832 mset.mu.Lock() 833 // If we are here we have a change in leader status. 834 if isLeader { 835 // Make sure we are listening for sync requests. 836 // TODO(dlc) - Original design was that all in sync members of the group would do DQ. 837 mset.startClusterSubs() 838 // Setup subscriptions 839 if err := mset.subscribeToStream(); err != nil { 840 mset.mu.Unlock() 841 return err 842 } 843 } else { 844 // cancel timer to create the source consumers if not fired yet 845 if mset.sourcesConsumerSetup != nil { 846 mset.sourcesConsumerSetup.Stop() 847 mset.sourcesConsumerSetup = nil 848 } 849 // Stop responding to sync requests. 850 mset.stopClusterSubs() 851 // Unsubscribe from direct stream. 852 mset.unsubscribeToStream(false) 853 // Clear catchup state 854 mset.clearAllCatchupPeers() 855 } 856 // Track group leader. 857 if mset.isClustered() { 858 mset.leader = mset.node.GroupLeader() 859 } else { 860 mset.leader = _EMPTY_ 861 } 862 mset.mu.Unlock() 863 864 // If we are interest based make sure to check consumers. 865 // This is to make sure we process any outstanding acks. 866 mset.checkInterestState() 867 868 return nil 869 } 870 871 // Lock should be held. 872 func (mset *stream) startClusterSubs() { 873 if mset.isClustered() && mset.syncSub == nil { 874 mset.syncSub, _ = mset.srv.systemSubscribe(mset.sa.Sync, _EMPTY_, false, mset.sysc, mset.handleClusterSyncRequest) 875 } 876 } 877 878 // Lock should be held. 879 func (mset *stream) stopClusterSubs() { 880 if mset.syncSub != nil { 881 mset.srv.sysUnsubscribe(mset.syncSub) 882 mset.syncSub = nil 883 } 884 } 885 886 // account gets the account for this stream. 887 func (mset *stream) account() *Account { 888 mset.mu.RLock() 889 jsa := mset.jsa 890 mset.mu.RUnlock() 891 if jsa == nil { 892 return nil 893 } 894 return jsa.acc() 895 } 896 897 // Helper to determine the max msg size for this stream if file based. 898 func (mset *stream) maxMsgSize() uint64 { 899 maxMsgSize := mset.cfg.MaxMsgSize 900 if maxMsgSize <= 0 { 901 // Pull from the account. 902 if mset.jsa != nil { 903 if acc := mset.jsa.acc(); acc != nil { 904 acc.mu.RLock() 905 maxMsgSize = acc.mpay 906 acc.mu.RUnlock() 907 } 908 } 909 // If all else fails use default. 910 if maxMsgSize <= 0 { 911 maxMsgSize = MAX_PAYLOAD_SIZE 912 } 913 } 914 // Now determine an estimation for the subjects etc. 915 maxSubject := -1 916 for _, subj := range mset.cfg.Subjects { 917 if subjectIsLiteral(subj) { 918 if len(subj) > maxSubject { 919 maxSubject = len(subj) 920 } 921 } 922 } 923 if maxSubject < 0 { 924 const defaultMaxSubject = 256 925 maxSubject = defaultMaxSubject 926 } 927 // filestore will add in estimates for record headers, etc. 928 return fileStoreMsgSizeEstimate(maxSubject, int(maxMsgSize)) 929 } 930 931 // If we are file based and the file storage config was not explicitly set 932 // we can autotune block sizes to better match. Our target will be to store 125% 933 // of the theoretical limit. We will round up to nearest 100 bytes as well. 934 func (mset *stream) autoTuneFileStorageBlockSize(fsCfg *FileStoreConfig) { 935 var totalEstSize uint64 936 937 // MaxBytes will take precedence for now. 938 if mset.cfg.MaxBytes > 0 { 939 totalEstSize = uint64(mset.cfg.MaxBytes) 940 } else if mset.cfg.MaxMsgs > 0 { 941 // Determine max message size to estimate. 942 totalEstSize = mset.maxMsgSize() * uint64(mset.cfg.MaxMsgs) 943 } else if mset.cfg.MaxMsgsPer > 0 { 944 fsCfg.BlockSize = uint64(defaultKVBlockSize) 945 return 946 } else { 947 // If nothing set will let underlying filestore determine blkSize. 948 return 949 } 950 951 blkSize := (totalEstSize / 4) + 1 // (25% overhead) 952 // Round up to nearest 100 953 if m := blkSize % 100; m != 0 { 954 blkSize += 100 - m 955 } 956 if blkSize <= FileStoreMinBlkSize { 957 blkSize = FileStoreMinBlkSize 958 } else if blkSize >= FileStoreMaxBlkSize { 959 blkSize = FileStoreMaxBlkSize 960 } else { 961 blkSize = defaultMediumBlockSize 962 } 963 fsCfg.BlockSize = uint64(blkSize) 964 } 965 966 // rebuildDedupe will rebuild any dedupe structures needed after recovery of a stream. 967 // Will be called lazily to avoid penalizing startup times. 968 // TODO(dlc) - Might be good to know if this should be checked at all for streams with no 969 // headers and msgId in them. Would need signaling from the storage layer. 970 // Lock should be held. 971 func (mset *stream) rebuildDedupe() { 972 if mset.ddloaded { 973 return 974 } 975 976 mset.ddloaded = true 977 978 // We have some messages. Lookup starting sequence by duplicate time window. 979 sseq := mset.store.GetSeqFromTime(time.Now().Add(-mset.cfg.Duplicates)) 980 if sseq == 0 { 981 return 982 } 983 984 var smv StoreMsg 985 var state StreamState 986 mset.store.FastState(&state) 987 988 for seq := sseq; seq <= state.LastSeq; seq++ { 989 sm, err := mset.store.LoadMsg(seq, &smv) 990 if err != nil { 991 continue 992 } 993 var msgId string 994 if len(sm.hdr) > 0 { 995 if msgId = getMsgId(sm.hdr); msgId != _EMPTY_ { 996 mset.storeMsgIdLocked(&ddentry{msgId, sm.seq, sm.ts}) 997 } 998 } 999 if seq == state.LastSeq { 1000 mset.lmsgId = msgId 1001 } 1002 } 1003 } 1004 1005 func (mset *stream) lastSeqAndCLFS() (uint64, uint64) { 1006 return mset.lastSeq(), mset.getCLFS() 1007 } 1008 1009 func (mset *stream) getCLFS() uint64 { 1010 mset.clMu.Lock() 1011 defer mset.clMu.Unlock() 1012 return mset.clfs 1013 } 1014 1015 func (mset *stream) setCLFS(clfs uint64) { 1016 mset.clMu.Lock() 1017 mset.clfs = clfs 1018 mset.clMu.Unlock() 1019 } 1020 1021 func (mset *stream) lastSeq() uint64 { 1022 mset.mu.RLock() 1023 defer mset.mu.RUnlock() 1024 return mset.lseq 1025 } 1026 1027 func (mset *stream) setLastSeq(lseq uint64) { 1028 mset.mu.Lock() 1029 mset.lseq = lseq 1030 mset.mu.Unlock() 1031 } 1032 1033 func (mset *stream) sendCreateAdvisory() { 1034 mset.mu.RLock() 1035 name := mset.cfg.Name 1036 template := mset.cfg.Template 1037 outq := mset.outq 1038 srv := mset.srv 1039 mset.mu.RUnlock() 1040 1041 if outq == nil { 1042 return 1043 } 1044 1045 // finally send an event that this stream was created 1046 m := JSStreamActionAdvisory{ 1047 TypedEvent: TypedEvent{ 1048 Type: JSStreamActionAdvisoryType, 1049 ID: nuid.Next(), 1050 Time: time.Now().UTC(), 1051 }, 1052 Stream: name, 1053 Action: CreateEvent, 1054 Template: template, 1055 Domain: srv.getOpts().JetStreamDomain, 1056 } 1057 1058 j, err := json.Marshal(m) 1059 if err != nil { 1060 return 1061 } 1062 1063 subj := JSAdvisoryStreamCreatedPre + "." + name 1064 outq.sendMsg(subj, j) 1065 } 1066 1067 func (mset *stream) sendDeleteAdvisoryLocked() { 1068 if mset.outq == nil { 1069 return 1070 } 1071 1072 m := JSStreamActionAdvisory{ 1073 TypedEvent: TypedEvent{ 1074 Type: JSStreamActionAdvisoryType, 1075 ID: nuid.Next(), 1076 Time: time.Now().UTC(), 1077 }, 1078 Stream: mset.cfg.Name, 1079 Action: DeleteEvent, 1080 Template: mset.cfg.Template, 1081 Domain: mset.srv.getOpts().JetStreamDomain, 1082 } 1083 1084 j, err := json.Marshal(m) 1085 if err == nil { 1086 subj := JSAdvisoryStreamDeletedPre + "." + mset.cfg.Name 1087 mset.outq.sendMsg(subj, j) 1088 } 1089 } 1090 1091 func (mset *stream) sendUpdateAdvisoryLocked() { 1092 if mset.outq == nil { 1093 return 1094 } 1095 1096 m := JSStreamActionAdvisory{ 1097 TypedEvent: TypedEvent{ 1098 Type: JSStreamActionAdvisoryType, 1099 ID: nuid.Next(), 1100 Time: time.Now().UTC(), 1101 }, 1102 Stream: mset.cfg.Name, 1103 Action: ModifyEvent, 1104 Domain: mset.srv.getOpts().JetStreamDomain, 1105 } 1106 1107 j, err := json.Marshal(m) 1108 if err == nil { 1109 subj := JSAdvisoryStreamUpdatedPre + "." + mset.cfg.Name 1110 mset.outq.sendMsg(subj, j) 1111 } 1112 } 1113 1114 // Created returns created time. 1115 func (mset *stream) createdTime() time.Time { 1116 mset.mu.RLock() 1117 created := mset.created 1118 mset.mu.RUnlock() 1119 return created 1120 } 1121 1122 // Internal to allow creation time to be restored. 1123 func (mset *stream) setCreatedTime(created time.Time) { 1124 mset.mu.Lock() 1125 mset.created = created 1126 mset.mu.Unlock() 1127 } 1128 1129 // subjectsOverlap to see if these subjects overlap with existing subjects. 1130 // Use only for non-clustered JetStream 1131 // RLock minimum should be held. 1132 func (jsa *jsAccount) subjectsOverlap(subjects []string, self *stream) bool { 1133 for _, mset := range jsa.streams { 1134 if self != nil && mset == self { 1135 continue 1136 } 1137 for _, subj := range mset.cfg.Subjects { 1138 for _, tsubj := range subjects { 1139 if SubjectsCollide(tsubj, subj) { 1140 return true 1141 } 1142 } 1143 } 1144 } 1145 return false 1146 } 1147 1148 // StreamDefaultDuplicatesWindow default duplicates window. 1149 const StreamDefaultDuplicatesWindow = 2 * time.Minute 1150 1151 func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account) (StreamConfig, *ApiError) { 1152 lim := &s.getOpts().JetStreamLimits 1153 1154 if config == nil { 1155 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration invalid")) 1156 } 1157 if !isValidName(config.Name) { 1158 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream name is required and can not contain '.', '*', '>'")) 1159 } 1160 if len(config.Name) > JSMaxNameLen { 1161 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream name is too long, maximum allowed is %d", JSMaxNameLen)) 1162 } 1163 if len(config.Description) > JSMaxDescriptionLen { 1164 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream description is too long, maximum allowed is %d", JSMaxDescriptionLen)) 1165 } 1166 1167 var metadataLen int 1168 for k, v := range config.Metadata { 1169 metadataLen += len(k) + len(v) 1170 } 1171 if metadataLen > JSMaxMetadataLen { 1172 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream metadata exceeds maximum size of %d bytes", JSMaxMetadataLen)) 1173 } 1174 1175 cfg := *config 1176 1177 // Make file the default. 1178 if cfg.Storage == 0 { 1179 cfg.Storage = FileStorage 1180 } 1181 if cfg.Replicas == 0 { 1182 cfg.Replicas = 1 1183 } 1184 if cfg.Replicas > StreamMaxReplicas { 1185 return cfg, NewJSStreamInvalidConfigError(fmt.Errorf("maximum replicas is %d", StreamMaxReplicas)) 1186 } 1187 if cfg.Replicas < 0 { 1188 return cfg, NewJSReplicasCountCannotBeNegativeError() 1189 } 1190 if cfg.MaxMsgs == 0 { 1191 cfg.MaxMsgs = -1 1192 } 1193 if cfg.MaxMsgsPer == 0 { 1194 cfg.MaxMsgsPer = -1 1195 } 1196 if cfg.MaxBytes == 0 { 1197 cfg.MaxBytes = -1 1198 } 1199 if cfg.MaxMsgSize == 0 { 1200 cfg.MaxMsgSize = -1 1201 } 1202 if cfg.MaxConsumers == 0 { 1203 cfg.MaxConsumers = -1 1204 } 1205 if cfg.Duplicates == 0 && cfg.Mirror == nil { 1206 maxWindow := StreamDefaultDuplicatesWindow 1207 if lim.Duplicates > 0 && maxWindow > lim.Duplicates { 1208 maxWindow = lim.Duplicates 1209 } 1210 if cfg.MaxAge != 0 && cfg.MaxAge < maxWindow { 1211 cfg.Duplicates = cfg.MaxAge 1212 } else { 1213 cfg.Duplicates = maxWindow 1214 } 1215 } 1216 if cfg.MaxAge > 0 && cfg.MaxAge < 100*time.Millisecond { 1217 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("max age needs to be >= 100ms")) 1218 } 1219 if cfg.Duplicates < 0 { 1220 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("duplicates window can not be negative")) 1221 } 1222 // Check that duplicates is not larger then age if set. 1223 if cfg.MaxAge != 0 && cfg.Duplicates > cfg.MaxAge { 1224 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("duplicates window can not be larger then max age")) 1225 } 1226 if lim.Duplicates > 0 && cfg.Duplicates > lim.Duplicates { 1227 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("duplicates window can not be larger then server limit of %v", 1228 lim.Duplicates.String())) 1229 } 1230 if cfg.Duplicates > 0 && cfg.Duplicates < 100*time.Millisecond { 1231 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("duplicates window needs to be >= 100ms")) 1232 } 1233 1234 if cfg.DenyPurge && cfg.AllowRollup { 1235 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("roll-ups require the purge permission")) 1236 } 1237 1238 // Check for new discard new per subject, we require the discard policy to also be new. 1239 if cfg.DiscardNewPer { 1240 if cfg.Discard != DiscardNew { 1241 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("discard new per subject requires discard new policy to be set")) 1242 } 1243 if cfg.MaxMsgsPer <= 0 { 1244 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("discard new per subject requires max msgs per subject > 0")) 1245 } 1246 } 1247 1248 getStream := func(streamName string) (bool, StreamConfig) { 1249 var exists bool 1250 var cfg StreamConfig 1251 if s.JetStreamIsClustered() { 1252 if js, _ := s.getJetStreamCluster(); js != nil { 1253 js.mu.RLock() 1254 if sa := js.streamAssignment(acc.Name, streamName); sa != nil { 1255 cfg = *sa.Config 1256 exists = true 1257 } 1258 js.mu.RUnlock() 1259 } 1260 } else if mset, err := acc.lookupStream(streamName); err == nil { 1261 cfg = mset.cfg 1262 exists = true 1263 } 1264 return exists, cfg 1265 } 1266 1267 hasStream := func(streamName string) (bool, int32, []string) { 1268 exists, cfg := getStream(streamName) 1269 return exists, cfg.MaxMsgSize, cfg.Subjects 1270 } 1271 1272 var streamSubs []string 1273 var deliveryPrefixes []string 1274 var apiPrefixes []string 1275 1276 // Do some pre-checking for mirror config to avoid cycles in clustered mode. 1277 if cfg.Mirror != nil { 1278 if cfg.FirstSeq > 0 { 1279 return StreamConfig{}, NewJSMirrorWithFirstSeqError() 1280 } 1281 if len(cfg.Subjects) > 0 { 1282 return StreamConfig{}, NewJSMirrorWithSubjectsError() 1283 } 1284 if len(cfg.Sources) > 0 { 1285 return StreamConfig{}, NewJSMirrorWithSourcesError() 1286 } 1287 if cfg.Mirror.FilterSubject != _EMPTY_ && len(cfg.Mirror.SubjectTransforms) != 0 { 1288 return StreamConfig{}, NewJSMirrorMultipleFiltersNotAllowedError() 1289 } 1290 // Check subject filters overlap. 1291 for outer, tr := range cfg.Mirror.SubjectTransforms { 1292 if !IsValidSubject(tr.Source) { 1293 return StreamConfig{}, NewJSMirrorInvalidSubjectFilterError() 1294 } 1295 for inner, innertr := range cfg.Mirror.SubjectTransforms { 1296 if inner != outer && SubjectsCollide(tr.Source, innertr.Source) { 1297 return StreamConfig{}, NewJSMirrorOverlappingSubjectFiltersError() 1298 } 1299 } 1300 } 1301 // Do not perform checks if External is provided, as it could lead to 1302 // checking against itself (if sourced stream name is the same on different JetStream) 1303 if cfg.Mirror.External == nil { 1304 if !isValidName(cfg.Mirror.Name) { 1305 return StreamConfig{}, NewJSMirrorInvalidStreamNameError() 1306 } 1307 // We do not require other stream to exist anymore, but if we can see it check payloads. 1308 exists, maxMsgSize, subs := hasStream(cfg.Mirror.Name) 1309 if len(subs) > 0 { 1310 streamSubs = append(streamSubs, subs...) 1311 } 1312 if exists { 1313 if cfg.MaxMsgSize > 0 && maxMsgSize > 0 && cfg.MaxMsgSize < maxMsgSize { 1314 return StreamConfig{}, NewJSMirrorMaxMessageSizeTooBigError() 1315 } 1316 } 1317 // Determine if we are inheriting direct gets. 1318 if exists, ocfg := getStream(cfg.Mirror.Name); exists { 1319 cfg.MirrorDirect = ocfg.AllowDirect 1320 } else if js := s.getJetStream(); js != nil && js.isClustered() { 1321 // Could not find it here. If we are clustered we can look it up. 1322 js.mu.RLock() 1323 if cc := js.cluster; cc != nil { 1324 if as := cc.streams[acc.Name]; as != nil { 1325 if sa := as[cfg.Mirror.Name]; sa != nil { 1326 cfg.MirrorDirect = sa.Config.AllowDirect 1327 } 1328 } 1329 } 1330 js.mu.RUnlock() 1331 } 1332 } else { 1333 if cfg.Mirror.External.DeliverPrefix != _EMPTY_ { 1334 deliveryPrefixes = append(deliveryPrefixes, cfg.Mirror.External.DeliverPrefix) 1335 } 1336 if cfg.Mirror.External.ApiPrefix != _EMPTY_ { 1337 apiPrefixes = append(apiPrefixes, cfg.Mirror.External.ApiPrefix) 1338 } 1339 1340 } 1341 } 1342 1343 // check for duplicates 1344 var iNames = make(map[string]struct{}) 1345 for _, src := range cfg.Sources { 1346 if !isValidName(src.Name) { 1347 return StreamConfig{}, NewJSSourceInvalidStreamNameError() 1348 } 1349 if _, ok := iNames[src.composeIName()]; !ok { 1350 iNames[src.composeIName()] = struct{}{} 1351 } else { 1352 return StreamConfig{}, NewJSSourceDuplicateDetectedError() 1353 } 1354 // Do not perform checks if External is provided, as it could lead to 1355 // checking against itself (if sourced stream name is the same on different JetStream) 1356 if src.External == nil { 1357 exists, maxMsgSize, subs := hasStream(src.Name) 1358 if len(subs) > 0 { 1359 streamSubs = append(streamSubs, subs...) 1360 } 1361 if exists { 1362 if cfg.MaxMsgSize > 0 && maxMsgSize > 0 && cfg.MaxMsgSize < maxMsgSize { 1363 return StreamConfig{}, NewJSSourceMaxMessageSizeTooBigError() 1364 } 1365 } 1366 1367 if src.FilterSubject != _EMPTY_ && len(src.SubjectTransforms) != 0 { 1368 return StreamConfig{}, NewJSSourceMultipleFiltersNotAllowedError() 1369 } 1370 1371 for _, tr := range src.SubjectTransforms { 1372 err := ValidateMappingDestination(tr.Destination) 1373 if err != nil { 1374 return StreamConfig{}, NewJSSourceInvalidTransformDestinationError() 1375 } 1376 } 1377 1378 // Check subject filters overlap. 1379 for outer, tr := range src.SubjectTransforms { 1380 if !IsValidSubject(tr.Source) { 1381 return StreamConfig{}, NewJSSourceInvalidSubjectFilterError() 1382 } 1383 for inner, innertr := range src.SubjectTransforms { 1384 if inner != outer && subjectIsSubsetMatch(tr.Source, innertr.Source) { 1385 return StreamConfig{}, NewJSSourceOverlappingSubjectFiltersError() 1386 } 1387 } 1388 } 1389 continue 1390 } else { 1391 if src.External.DeliverPrefix != _EMPTY_ { 1392 deliveryPrefixes = append(deliveryPrefixes, src.External.DeliverPrefix) 1393 } 1394 if src.External.ApiPrefix != _EMPTY_ { 1395 apiPrefixes = append(apiPrefixes, src.External.ApiPrefix) 1396 } 1397 } 1398 } 1399 1400 // check prefix overlap with subjects 1401 for _, pfx := range deliveryPrefixes { 1402 if !IsValidPublishSubject(pfx) { 1403 return StreamConfig{}, NewJSStreamInvalidExternalDeliverySubjError(pfx) 1404 } 1405 for _, sub := range streamSubs { 1406 if SubjectsCollide(sub, fmt.Sprintf("%s.%s", pfx, sub)) { 1407 return StreamConfig{}, NewJSStreamExternalDelPrefixOverlapsError(pfx, sub) 1408 } 1409 } 1410 } 1411 // check if api prefixes overlap 1412 for _, apiPfx := range apiPrefixes { 1413 if !IsValidPublishSubject(apiPfx) { 1414 return StreamConfig{}, NewJSStreamInvalidConfigError( 1415 fmt.Errorf("stream external api prefix %q must be a valid subject without wildcards", apiPfx)) 1416 } 1417 if SubjectsCollide(apiPfx, JSApiPrefix) { 1418 return StreamConfig{}, NewJSStreamExternalApiOverlapError(apiPfx, JSApiPrefix) 1419 } 1420 } 1421 1422 // cycle check for source cycle 1423 toVisit := []*StreamConfig{&cfg} 1424 visited := make(map[string]struct{}) 1425 overlaps := func(subjects []string, filter string) bool { 1426 if filter == _EMPTY_ { 1427 return true 1428 } 1429 for _, subject := range subjects { 1430 if SubjectsCollide(subject, filter) { 1431 return true 1432 } 1433 } 1434 return false 1435 } 1436 1437 for len(toVisit) > 0 { 1438 cfg := toVisit[0] 1439 toVisit = toVisit[1:] 1440 visited[cfg.Name] = struct{}{} 1441 for _, src := range cfg.Sources { 1442 if src.External != nil { 1443 continue 1444 } 1445 // We can detect a cycle between streams, but let's double check that the 1446 // subjects actually form a cycle. 1447 if _, ok := visited[src.Name]; ok { 1448 if overlaps(cfg.Subjects, src.FilterSubject) { 1449 return StreamConfig{}, NewJSStreamInvalidConfigError(errors.New("detected cycle")) 1450 } 1451 } else if exists, cfg := getStream(src.Name); exists { 1452 toVisit = append(toVisit, &cfg) 1453 } 1454 } 1455 // Avoid cycles hiding behind mirrors 1456 if m := cfg.Mirror; m != nil { 1457 if m.External == nil { 1458 if _, ok := visited[m.Name]; ok { 1459 return StreamConfig{}, NewJSStreamInvalidConfigError(errors.New("detected cycle")) 1460 } 1461 if exists, cfg := getStream(m.Name); exists { 1462 toVisit = append(toVisit, &cfg) 1463 } 1464 } 1465 } 1466 } 1467 1468 if len(cfg.Subjects) == 0 { 1469 if cfg.Mirror == nil && len(cfg.Sources) == 0 { 1470 cfg.Subjects = append(cfg.Subjects, cfg.Name) 1471 } 1472 } else { 1473 if cfg.Mirror != nil { 1474 return StreamConfig{}, NewJSMirrorWithSubjectsError() 1475 } 1476 1477 // Check for literal duplication of subject interest in config 1478 // and no overlap with any JS API subject space 1479 dset := make(map[string]struct{}, len(cfg.Subjects)) 1480 for _, subj := range cfg.Subjects { 1481 if _, ok := dset[subj]; ok { 1482 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("duplicate subjects detected")) 1483 } 1484 // Also check to make sure we do not overlap with our $JS API subjects. 1485 if subjectIsSubsetMatch(subj, "$JS.API.>") { 1486 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("subjects overlap with jetstream api")) 1487 } 1488 // Make sure the subject is valid. 1489 if !IsValidSubject(subj) { 1490 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("invalid subject")) 1491 } 1492 // Mark for duplicate check. 1493 dset[subj] = struct{}{} 1494 } 1495 } 1496 1497 if len(cfg.Subjects) == 0 && len(cfg.Sources) == 0 && cfg.Mirror == nil { 1498 return StreamConfig{}, NewJSStreamInvalidConfigError( 1499 fmt.Errorf("stream needs at least one configured subject or be a source/mirror")) 1500 } 1501 1502 // Check for MaxBytes required and it's limit 1503 if required, limit := acc.maxBytesLimits(&cfg); required && cfg.MaxBytes <= 0 { 1504 return StreamConfig{}, NewJSStreamMaxBytesRequiredError() 1505 } else if limit > 0 && cfg.MaxBytes > limit { 1506 return StreamConfig{}, NewJSStreamMaxStreamBytesExceededError() 1507 } 1508 1509 // Now check if we have multiple subjects they we do not overlap ourselves 1510 // which would cause duplicate entries (assuming no MsgID). 1511 if len(cfg.Subjects) > 1 { 1512 for _, subj := range cfg.Subjects { 1513 for _, tsubj := range cfg.Subjects { 1514 if tsubj != subj && SubjectsCollide(tsubj, subj) { 1515 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("subject %q overlaps with %q", subj, tsubj)) 1516 } 1517 } 1518 } 1519 } 1520 1521 // If we have a republish directive check if we can create a transform here. 1522 if cfg.RePublish != nil { 1523 // Check to make sure source is a valid subset of the subjects we have. 1524 // Also make sure it does not form a cycle. 1525 // Empty same as all. 1526 if cfg.RePublish.Source == _EMPTY_ { 1527 cfg.RePublish.Source = fwcs 1528 } 1529 var formsCycle bool 1530 for _, subj := range cfg.Subjects { 1531 if SubjectsCollide(cfg.RePublish.Destination, subj) { 1532 formsCycle = true 1533 break 1534 } 1535 } 1536 if formsCycle { 1537 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration for republish destination forms a cycle")) 1538 } 1539 if _, err := NewSubjectTransform(cfg.RePublish.Source, cfg.RePublish.Destination); err != nil { 1540 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration for republish with transform from '%s' to '%s' not valid", cfg.RePublish.Source, cfg.RePublish.Destination)) 1541 } 1542 } 1543 1544 return cfg, nil 1545 } 1546 1547 // Config returns the stream's configuration. 1548 func (mset *stream) config() StreamConfig { 1549 mset.mu.RLock() 1550 defer mset.mu.RUnlock() 1551 return mset.cfg 1552 } 1553 1554 func (mset *stream) fileStoreConfig() (FileStoreConfig, error) { 1555 mset.mu.Lock() 1556 defer mset.mu.Unlock() 1557 fs, ok := mset.store.(*fileStore) 1558 if !ok { 1559 return FileStoreConfig{}, ErrStoreWrongType 1560 } 1561 return fs.fileStoreConfig(), nil 1562 } 1563 1564 // Do not hold jsAccount or jetStream lock 1565 func (jsa *jsAccount) configUpdateCheck(old, new *StreamConfig, s *Server) (*StreamConfig, error) { 1566 cfg, apiErr := s.checkStreamCfg(new, jsa.acc()) 1567 if apiErr != nil { 1568 return nil, apiErr 1569 } 1570 1571 // Name must match. 1572 if cfg.Name != old.Name { 1573 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration name must match original")) 1574 } 1575 // Can't change MaxConsumers for now. 1576 if cfg.MaxConsumers != old.MaxConsumers { 1577 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not change MaxConsumers")) 1578 } 1579 // Can't change storage types. 1580 if cfg.Storage != old.Storage { 1581 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not change storage type")) 1582 } 1583 // Can only change retention from limits to interest or back, not to/from work queue for now. 1584 if cfg.Retention != old.Retention { 1585 if old.Retention == WorkQueuePolicy || cfg.Retention == WorkQueuePolicy { 1586 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not change retention policy to/from workqueue")) 1587 } 1588 } 1589 // Can not have a template owner for now. 1590 if old.Template != _EMPTY_ { 1591 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update not allowed on template owned stream")) 1592 } 1593 if cfg.Template != _EMPTY_ { 1594 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not be owned by a template")) 1595 } 1596 // Can not change from true to false. 1597 if !cfg.Sealed && old.Sealed { 1598 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not unseal a sealed stream")) 1599 } 1600 // Can not change from true to false. 1601 if !cfg.DenyDelete && old.DenyDelete { 1602 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not cancel deny message deletes")) 1603 } 1604 // Can not change from true to false. 1605 if !cfg.DenyPurge && old.DenyPurge { 1606 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not cancel deny purge")) 1607 } 1608 // Check for mirror changes which are not allowed. 1609 if !reflect.DeepEqual(cfg.Mirror, old.Mirror) { 1610 return nil, NewJSStreamMirrorNotUpdatableError() 1611 } 1612 1613 // Check on new discard new per subject. 1614 if cfg.DiscardNewPer { 1615 if cfg.Discard != DiscardNew { 1616 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("discard new per subject requires discard new policy to be set")) 1617 } 1618 if cfg.MaxMsgsPer <= 0 { 1619 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("discard new per subject requires max msgs per subject > 0")) 1620 } 1621 } 1622 1623 // Do some adjustments for being sealed. 1624 if cfg.Sealed { 1625 cfg.MaxAge = 0 1626 cfg.Discard = DiscardNew 1627 cfg.DenyDelete, cfg.DenyPurge = true, true 1628 cfg.AllowRollup = false 1629 } 1630 1631 // Check limits. We need some extra handling to allow updating MaxBytes. 1632 1633 // First, let's calculate the difference between the new and old MaxBytes. 1634 maxBytesDiff := cfg.MaxBytes - old.MaxBytes 1635 if maxBytesDiff < 0 { 1636 // If we're updating to a lower MaxBytes (maxBytesDiff is negative), 1637 // then set to zero so checkBytesLimits doesn't set addBytes to 1. 1638 maxBytesDiff = 0 1639 } 1640 // If maxBytesDiff == 0, then that means MaxBytes didn't change. 1641 // If maxBytesDiff > 0, then we want to reserve additional bytes. 1642 1643 // Save the user configured MaxBytes. 1644 newMaxBytes := cfg.MaxBytes 1645 maxBytesOffset := int64(0) 1646 1647 // We temporarily set cfg.MaxBytes to maxBytesDiff because checkAllLimits 1648 // adds cfg.MaxBytes to the current reserved limit and checks if we've gone 1649 // over. However, we don't want an addition cfg.MaxBytes, we only want to 1650 // reserve the difference between the new and the old values. 1651 cfg.MaxBytes = maxBytesDiff 1652 1653 // Check limits. 1654 js, isClustered := jsa.jetStreamAndClustered() 1655 jsa.mu.RLock() 1656 acc := jsa.account 1657 jsa.usageMu.RLock() 1658 selected, tier, hasTier := jsa.selectLimits(&cfg) 1659 if !hasTier && old.Replicas != cfg.Replicas { 1660 selected, tier, hasTier = jsa.selectLimits(old) 1661 } 1662 jsa.usageMu.RUnlock() 1663 reserved := int64(0) 1664 if !isClustered { 1665 reserved = jsa.tieredReservation(tier, &cfg) 1666 } 1667 jsa.mu.RUnlock() 1668 if !hasTier { 1669 return nil, NewJSNoLimitsError() 1670 } 1671 js.mu.RLock() 1672 defer js.mu.RUnlock() 1673 if isClustered { 1674 _, reserved = tieredStreamAndReservationCount(js.cluster.streams[acc.Name], tier, &cfg) 1675 } 1676 // reservation does not account for this stream, hence add the old value 1677 if tier == _EMPTY_ && old.Replicas > 1 { 1678 reserved += old.MaxBytes * int64(old.Replicas) 1679 } else { 1680 reserved += old.MaxBytes 1681 } 1682 if err := js.checkAllLimits(&selected, &cfg, reserved, maxBytesOffset); err != nil { 1683 return nil, err 1684 } 1685 // Restore the user configured MaxBytes. 1686 cfg.MaxBytes = newMaxBytes 1687 return &cfg, nil 1688 } 1689 1690 // Update will allow certain configuration properties of an existing stream to be updated. 1691 func (mset *stream) update(config *StreamConfig) error { 1692 return mset.updateWithAdvisory(config, true) 1693 } 1694 1695 // Update will allow certain configuration properties of an existing stream to be updated. 1696 func (mset *stream) updateWithAdvisory(config *StreamConfig, sendAdvisory bool) error { 1697 _, jsa, err := mset.acc.checkForJetStream() 1698 if err != nil { 1699 return err 1700 } 1701 1702 mset.mu.RLock() 1703 ocfg := mset.cfg 1704 s := mset.srv 1705 mset.mu.RUnlock() 1706 1707 cfg, err := mset.jsa.configUpdateCheck(&ocfg, config, s) 1708 if err != nil { 1709 return NewJSStreamInvalidConfigError(err, Unless(err)) 1710 } 1711 1712 // In the event that some of the stream-level limits have changed, yell appropriately 1713 // if any of the consumers exceed that limit. 1714 updateLimits := ocfg.ConsumerLimits.InactiveThreshold != cfg.ConsumerLimits.InactiveThreshold || 1715 ocfg.ConsumerLimits.MaxAckPending != cfg.ConsumerLimits.MaxAckPending 1716 if updateLimits { 1717 var errorConsumers []string 1718 consumers := map[string]*ConsumerConfig{} 1719 if mset.js.isClustered() { 1720 for _, c := range mset.sa.consumers { 1721 consumers[c.Name] = c.Config 1722 } 1723 } else { 1724 for _, c := range mset.consumers { 1725 consumers[c.name] = &c.cfg 1726 } 1727 } 1728 for name, ccfg := range consumers { 1729 if ccfg.InactiveThreshold > cfg.ConsumerLimits.InactiveThreshold || 1730 ccfg.MaxAckPending > cfg.ConsumerLimits.MaxAckPending { 1731 errorConsumers = append(errorConsumers, name) 1732 } 1733 } 1734 if len(errorConsumers) > 0 { 1735 // TODO(nat): Return a parsable error so that we can surface something 1736 // sensible through the JS API. 1737 return fmt.Errorf("change to limits violates consumers: %s", strings.Join(errorConsumers, ", ")) 1738 } 1739 } 1740 1741 jsa.mu.RLock() 1742 if jsa.subjectsOverlap(cfg.Subjects, mset) { 1743 jsa.mu.RUnlock() 1744 return NewJSStreamSubjectOverlapError() 1745 } 1746 jsa.mu.RUnlock() 1747 1748 mset.mu.Lock() 1749 if mset.isLeader() { 1750 // Now check for subject interest differences. 1751 current := make(map[string]struct{}, len(ocfg.Subjects)) 1752 for _, s := range ocfg.Subjects { 1753 current[s] = struct{}{} 1754 } 1755 // Update config with new values. The store update will enforce any stricter limits. 1756 1757 // Now walk new subjects. All of these need to be added, but we will check 1758 // the originals first, since if it is in there we can skip, already added. 1759 for _, s := range cfg.Subjects { 1760 if _, ok := current[s]; !ok { 1761 if _, err := mset.subscribeInternal(s, mset.processInboundJetStreamMsg); err != nil { 1762 mset.mu.Unlock() 1763 return err 1764 } 1765 } 1766 delete(current, s) 1767 } 1768 // What is left in current needs to be deleted. 1769 for s := range current { 1770 if err := mset.unsubscribeInternal(s); err != nil { 1771 mset.mu.Unlock() 1772 return err 1773 } 1774 } 1775 1776 // Check for the Duplicates 1777 if cfg.Duplicates != ocfg.Duplicates && mset.ddtmr != nil { 1778 // Let it fire right away, it will adjust properly on purge. 1779 mset.ddtmr.Reset(time.Microsecond) 1780 } 1781 1782 // Check for Sources. 1783 if len(cfg.Sources) > 0 || len(ocfg.Sources) > 0 { 1784 currentIName := make(map[string]struct{}) 1785 needsStartingSeqNum := make(map[string]struct{}) 1786 1787 for _, s := range ocfg.Sources { 1788 currentIName[s.iname] = struct{}{} 1789 } 1790 for _, s := range cfg.Sources { 1791 s.setIndexName() 1792 if _, ok := currentIName[s.iname]; !ok { 1793 // new source 1794 if mset.sources == nil { 1795 mset.sources = make(map[string]*sourceInfo) 1796 } 1797 mset.cfg.Sources = append(mset.cfg.Sources, s) 1798 1799 var si *sourceInfo 1800 1801 if len(s.SubjectTransforms) == 0 { 1802 si = &sourceInfo{name: s.Name, iname: s.iname, sf: s.FilterSubject} 1803 } else { 1804 si = &sourceInfo{name: s.Name, iname: s.iname} 1805 si.trs = make([]*subjectTransform, len(s.SubjectTransforms)) 1806 si.sfs = make([]string, len(s.SubjectTransforms)) 1807 for i := range s.SubjectTransforms { 1808 // err can be ignored as already validated in config check 1809 si.sfs[i] = s.SubjectTransforms[i].Source 1810 var err error 1811 si.trs[i], err = NewSubjectTransform(s.SubjectTransforms[i].Source, s.SubjectTransforms[i].Destination) 1812 if err != nil { 1813 mset.mu.Unlock() 1814 mset.srv.Errorf("Unable to get subject transform for source: %v", err) 1815 } 1816 } 1817 } 1818 1819 mset.sources[s.iname] = si 1820 needsStartingSeqNum[s.iname] = struct{}{} 1821 } else { 1822 // source already exists 1823 delete(currentIName, s.iname) 1824 } 1825 } 1826 // What is left in currentIName needs to be deleted. 1827 for iName := range currentIName { 1828 mset.cancelSourceConsumer(iName) 1829 delete(mset.sources, iName) 1830 } 1831 neededCopy := make(map[string]struct{}, len(needsStartingSeqNum)) 1832 for iName := range needsStartingSeqNum { 1833 neededCopy[iName] = struct{}{} 1834 } 1835 mset.setStartingSequenceForSources(needsStartingSeqNum) 1836 for iName := range neededCopy { 1837 mset.setSourceConsumer(iName, mset.sources[iName].sseq+1, time.Time{}) 1838 } 1839 } 1840 } 1841 1842 // Check for a change in allow direct status. 1843 // These will run on all members, so just update as appropriate here. 1844 // We do make sure we are caught up under monitorStream() during initial startup. 1845 if cfg.AllowDirect != ocfg.AllowDirect { 1846 if cfg.AllowDirect { 1847 mset.subscribeToDirect() 1848 } else { 1849 mset.unsubscribeToDirect() 1850 } 1851 } 1852 1853 // Check for changes to RePublish. 1854 if cfg.RePublish != nil { 1855 // Empty same as all. 1856 if cfg.RePublish.Source == _EMPTY_ { 1857 cfg.RePublish.Source = fwcs 1858 } 1859 if cfg.RePublish.Destination == _EMPTY_ { 1860 cfg.RePublish.Destination = fwcs 1861 } 1862 tr, err := NewSubjectTransform(cfg.RePublish.Source, cfg.RePublish.Destination) 1863 if err != nil { 1864 mset.mu.Unlock() 1865 return fmt.Errorf("stream configuration for republish from '%s' to '%s': %w", cfg.RePublish.Source, cfg.RePublish.Destination, err) 1866 } 1867 // Assign our transform for republishing. 1868 mset.tr = tr 1869 } else { 1870 mset.tr = nil 1871 } 1872 1873 // Check for changes to subject transform 1874 if ocfg.SubjectTransform == nil && cfg.SubjectTransform != nil { 1875 tr, err := NewSubjectTransform(cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination) 1876 if err != nil { 1877 mset.mu.Unlock() 1878 return fmt.Errorf("stream configuration for subject transform from '%s' to '%s': %w", cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination, err) 1879 } 1880 mset.itr = tr 1881 } else if ocfg.SubjectTransform != nil && cfg.SubjectTransform != nil && 1882 (ocfg.SubjectTransform.Source != cfg.SubjectTransform.Source || ocfg.SubjectTransform.Destination != cfg.SubjectTransform.Destination) { 1883 tr, err := NewSubjectTransform(cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination) 1884 if err != nil { 1885 mset.mu.Unlock() 1886 return fmt.Errorf("stream configuration for subject transform from '%s' to '%s': %w", cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination, err) 1887 } 1888 mset.itr = tr 1889 } else if ocfg.SubjectTransform != nil && cfg.SubjectTransform == nil { 1890 mset.itr = nil 1891 } 1892 1893 js := mset.js 1894 1895 if targetTier := tierName(cfg); mset.tier != targetTier { 1896 // In cases such as R1->R3, only one update is needed 1897 jsa.usageMu.RLock() 1898 _, ok := jsa.limits[targetTier] 1899 jsa.usageMu.RUnlock() 1900 if ok { 1901 // error never set 1902 _, reported, _ := mset.store.Utilization() 1903 jsa.updateUsage(mset.tier, mset.stype, -int64(reported)) 1904 jsa.updateUsage(targetTier, mset.stype, int64(reported)) 1905 mset.tier = targetTier 1906 } 1907 // else in case the new tier does not exist (say on move), keep the old tier around 1908 // a subsequent update to an existing tier will then move from existing past tier to existing new tier 1909 } 1910 1911 if mset.isLeader() && mset.sa != nil && ocfg.Retention != cfg.Retention && cfg.Retention == InterestPolicy { 1912 // Before we can update the retention policy for the consumer, we need 1913 // the replica count of all consumers to match the stream. 1914 for _, c := range mset.sa.consumers { 1915 if c.Config.Replicas > 0 && c.Config.Replicas != cfg.Replicas { 1916 mset.mu.Unlock() 1917 return fmt.Errorf("consumer %q replica count must be %d", c.Name, cfg.Replicas) 1918 } 1919 } 1920 } 1921 1922 // Now update config and store's version of our config. 1923 mset.cfg = *cfg 1924 1925 // If we're changing retention and haven't errored because of consumer 1926 // replicas by now, whip through and update the consumer retention. 1927 if ocfg.Retention != cfg.Retention && cfg.Retention == InterestPolicy { 1928 toUpdate := make([]*consumer, 0, len(mset.consumers)) 1929 for _, c := range mset.consumers { 1930 toUpdate = append(toUpdate, c) 1931 } 1932 mset.mu.Unlock() 1933 for _, c := range toUpdate { 1934 c.mu.Lock() 1935 c.retention = cfg.Retention 1936 c.mu.Unlock() 1937 if c.retention == InterestPolicy { 1938 // If we're switching to interest, force a check of the 1939 // interest of existing stream messages. 1940 c.checkStateForInterestStream() 1941 } 1942 } 1943 mset.mu.Lock() 1944 } 1945 1946 // If we are the leader never suppress update advisory, simply send. 1947 if mset.isLeader() && sendAdvisory { 1948 mset.sendUpdateAdvisoryLocked() 1949 } 1950 mset.mu.Unlock() 1951 1952 if js != nil { 1953 maxBytesDiff := cfg.MaxBytes - ocfg.MaxBytes 1954 if maxBytesDiff > 0 { 1955 // Reserve the difference 1956 js.reserveStreamResources(&StreamConfig{ 1957 MaxBytes: maxBytesDiff, 1958 Storage: cfg.Storage, 1959 }) 1960 } else if maxBytesDiff < 0 { 1961 // Release the difference 1962 js.releaseStreamResources(&StreamConfig{ 1963 MaxBytes: -maxBytesDiff, 1964 Storage: ocfg.Storage, 1965 }) 1966 } 1967 } 1968 1969 mset.store.UpdateConfig(cfg) 1970 1971 return nil 1972 } 1973 1974 // Purge will remove all messages from the stream and underlying store based on the request. 1975 func (mset *stream) purge(preq *JSApiStreamPurgeRequest) (purged uint64, err error) { 1976 mset.mu.RLock() 1977 if mset.closed.Load() { 1978 mset.mu.RUnlock() 1979 return 0, errStreamClosed 1980 } 1981 if mset.cfg.Sealed { 1982 mset.mu.RUnlock() 1983 return 0, errors.New("sealed stream") 1984 } 1985 store, mlseq := mset.store, mset.lseq 1986 mset.mu.RUnlock() 1987 1988 if preq != nil { 1989 purged, err = mset.store.PurgeEx(preq.Subject, preq.Sequence, preq.Keep) 1990 } else { 1991 purged, err = mset.store.Purge() 1992 } 1993 if err != nil { 1994 return purged, err 1995 } 1996 1997 // Grab our stream state. 1998 var state StreamState 1999 store.FastState(&state) 2000 fseq, lseq := state.FirstSeq, state.LastSeq 2001 2002 // Check if our last has moved past what our original last sequence was, if so reset. 2003 if lseq > mlseq { 2004 mset.setLastSeq(lseq) 2005 } 2006 2007 // Purge consumers. 2008 // Check for filtered purge. 2009 if preq != nil && preq.Subject != _EMPTY_ { 2010 ss := store.FilteredState(fseq, preq.Subject) 2011 fseq = ss.First 2012 } 2013 2014 mset.clsMu.RLock() 2015 for _, o := range mset.cList { 2016 start := fseq 2017 o.mu.RLock() 2018 // we update consumer sequences if: 2019 // no subject was specified, we can purge all consumers sequences 2020 doPurge := preq == nil || 2021 preq.Subject == _EMPTY_ || 2022 // consumer filter subject is equal to purged subject 2023 // or consumer filter subject is subset of purged subject, 2024 // but not the other way around. 2025 o.isEqualOrSubsetMatch(preq.Subject) 2026 // Check if a consumer has a wider subject space then what we purged 2027 var isWider bool 2028 if !doPurge && preq != nil && o.isFilteredMatch(preq.Subject) { 2029 doPurge, isWider = true, true 2030 start = state.FirstSeq 2031 } 2032 o.mu.RUnlock() 2033 if doPurge { 2034 o.purge(start, lseq, isWider) 2035 } 2036 } 2037 mset.clsMu.RUnlock() 2038 2039 return purged, nil 2040 } 2041 2042 // RemoveMsg will remove a message from a stream. 2043 // FIXME(dlc) - Should pick one and be consistent. 2044 func (mset *stream) removeMsg(seq uint64) (bool, error) { 2045 return mset.deleteMsg(seq) 2046 } 2047 2048 // DeleteMsg will remove a message from a stream. 2049 func (mset *stream) deleteMsg(seq uint64) (bool, error) { 2050 if mset.closed.Load() { 2051 return false, errStreamClosed 2052 } 2053 return mset.store.RemoveMsg(seq) 2054 } 2055 2056 // EraseMsg will securely remove a message and rewrite the data with random data. 2057 func (mset *stream) eraseMsg(seq uint64) (bool, error) { 2058 if mset.closed.Load() { 2059 return false, errStreamClosed 2060 } 2061 return mset.store.EraseMsg(seq) 2062 } 2063 2064 // Are we a mirror? 2065 func (mset *stream) isMirror() bool { 2066 mset.mu.RLock() 2067 defer mset.mu.RUnlock() 2068 return mset.cfg.Mirror != nil 2069 } 2070 2071 func (mset *stream) sourcesInfo() (sis []*StreamSourceInfo) { 2072 mset.mu.RLock() 2073 defer mset.mu.RUnlock() 2074 for _, si := range mset.sources { 2075 sis = append(sis, mset.sourceInfo(si)) 2076 } 2077 return sis 2078 } 2079 2080 // Lock should be held 2081 func (mset *stream) sourceInfo(si *sourceInfo) *StreamSourceInfo { 2082 if si == nil { 2083 return nil 2084 } 2085 2086 var ssi = StreamSourceInfo{Name: si.name, Lag: si.lag, Error: si.err, FilterSubject: si.sf} 2087 2088 trConfigs := make([]SubjectTransformConfig, len(si.sfs)) 2089 for i := range si.sfs { 2090 destination := _EMPTY_ 2091 if si.trs[i] != nil { 2092 destination = si.trs[i].dest 2093 } 2094 trConfigs[i] = SubjectTransformConfig{si.sfs[i], destination} 2095 } 2096 2097 ssi.SubjectTransforms = trConfigs 2098 2099 // If we have not heard from the source, set Active to -1. 2100 if si.last.IsZero() { 2101 ssi.Active = -1 2102 } else { 2103 ssi.Active = time.Since(si.last) 2104 } 2105 2106 var ext *ExternalStream 2107 if mset.cfg.Mirror != nil { 2108 ext = mset.cfg.Mirror.External 2109 } else if ss := mset.streamSource(si.iname); ss != nil && ss.External != nil { 2110 ext = ss.External 2111 } 2112 if ext != nil { 2113 ssi.External = &ExternalStream{ 2114 ApiPrefix: ext.ApiPrefix, 2115 DeliverPrefix: ext.DeliverPrefix, 2116 } 2117 } 2118 return &ssi 2119 } 2120 2121 // Return our source info for our mirror. 2122 func (mset *stream) mirrorInfo() *StreamSourceInfo { 2123 mset.mu.RLock() 2124 defer mset.mu.RUnlock() 2125 return mset.sourceInfo(mset.mirror) 2126 } 2127 2128 const sourceHealthCheckInterval = 1 * time.Second 2129 2130 // Will run as a Go routine to process mirror consumer messages. 2131 func (mset *stream) processMirrorMsgs(mirror *sourceInfo, ready *sync.WaitGroup) { 2132 s := mset.srv 2133 defer func() { 2134 mirror.wg.Done() 2135 s.grWG.Done() 2136 }() 2137 2138 // Grab stream quit channel. 2139 mset.mu.Lock() 2140 msgs, qch, siqch := mirror.msgs, mset.qch, mirror.qch 2141 // Set the last seen as now so that we don't fail at the first check. 2142 mirror.last = time.Now() 2143 mset.mu.Unlock() 2144 2145 // Signal the caller that we have captured the above fields. 2146 ready.Done() 2147 2148 // Make sure we have valid ipq for msgs. 2149 if msgs == nil { 2150 mset.mu.Lock() 2151 mset.cancelMirrorConsumer() 2152 mset.mu.Unlock() 2153 return 2154 } 2155 2156 t := time.NewTicker(sourceHealthCheckInterval) 2157 defer t.Stop() 2158 2159 for { 2160 select { 2161 case <-s.quitCh: 2162 return 2163 case <-qch: 2164 return 2165 case <-siqch: 2166 return 2167 case <-msgs.ch: 2168 ims := msgs.pop() 2169 for _, im := range ims { 2170 if !mset.processInboundMirrorMsg(im) { 2171 break 2172 } 2173 } 2174 msgs.recycle(&ims) 2175 case <-t.C: 2176 mset.mu.RLock() 2177 isLeader := mset.isLeader() 2178 stalled := mset.mirror != nil && time.Since(mset.mirror.last) > 3*sourceHealthCheckInterval 2179 mset.mu.RUnlock() 2180 // No longer leader. 2181 if !isLeader { 2182 mset.mu.Lock() 2183 mset.cancelMirrorConsumer() 2184 mset.mu.Unlock() 2185 return 2186 } 2187 // We are stalled. 2188 if stalled { 2189 mset.retryMirrorConsumer() 2190 } 2191 } 2192 } 2193 } 2194 2195 // Checks that the message is from our current direct consumer. We can not depend on sub comparison 2196 // since cross account imports break. 2197 func (si *sourceInfo) isCurrentSub(reply string) bool { 2198 return si.cname != _EMPTY_ && strings.HasPrefix(reply, jsAckPre) && si.cname == tokenAt(reply, 4) 2199 } 2200 2201 // processInboundMirrorMsg handles processing messages bound for a stream. 2202 func (mset *stream) processInboundMirrorMsg(m *inMsg) bool { 2203 mset.mu.Lock() 2204 if mset.mirror == nil { 2205 mset.mu.Unlock() 2206 return false 2207 } 2208 if !mset.isLeader() { 2209 mset.cancelMirrorConsumer() 2210 mset.mu.Unlock() 2211 return false 2212 } 2213 2214 isControl := m.isControlMsg() 2215 2216 // Ignore from old subscriptions. 2217 // The reason we can not just compare subs is that on cross account imports they will not match. 2218 if !mset.mirror.isCurrentSub(m.rply) && !isControl { 2219 mset.mu.Unlock() 2220 return false 2221 } 2222 2223 mset.mirror.last = time.Now() 2224 node := mset.node 2225 2226 // Check for heartbeats and flow control messages. 2227 if isControl { 2228 var needsRetry bool 2229 // Flow controls have reply subjects. 2230 if m.rply != _EMPTY_ { 2231 mset.handleFlowControl(m) 2232 } else { 2233 // For idle heartbeats make sure we did not miss anything and check if we are considered stalled. 2234 if ldseq := parseInt64(getHeader(JSLastConsumerSeq, m.hdr)); ldseq > 0 && uint64(ldseq) != mset.mirror.dseq { 2235 needsRetry = true 2236 } else if fcReply := getHeader(JSConsumerStalled, m.hdr); len(fcReply) > 0 { 2237 // Other side thinks we are stalled, so send flow control reply. 2238 mset.outq.sendMsg(string(fcReply), nil) 2239 } 2240 } 2241 mset.mu.Unlock() 2242 if needsRetry { 2243 mset.retryMirrorConsumer() 2244 } 2245 return !needsRetry 2246 } 2247 2248 sseq, dseq, dc, ts, pending := replyInfo(m.rply) 2249 2250 if dc > 1 { 2251 mset.mu.Unlock() 2252 return false 2253 } 2254 2255 // Mirror info tracking. 2256 olag, osseq, odseq := mset.mirror.lag, mset.mirror.sseq, mset.mirror.dseq 2257 if sseq == mset.mirror.sseq+1 { 2258 mset.mirror.dseq = dseq 2259 mset.mirror.sseq++ 2260 } else if sseq <= mset.mirror.sseq { 2261 // Ignore older messages. 2262 mset.mu.Unlock() 2263 return true 2264 } else if mset.mirror.cname == _EMPTY_ { 2265 mset.mirror.cname = tokenAt(m.rply, 4) 2266 mset.mirror.dseq, mset.mirror.sseq = dseq, sseq 2267 } else { 2268 // If the deliver sequence matches then the upstream stream has expired or deleted messages. 2269 if dseq == mset.mirror.dseq+1 { 2270 mset.skipMsgs(mset.mirror.sseq+1, sseq-1) 2271 mset.mirror.dseq++ 2272 mset.mirror.sseq = sseq 2273 } else { 2274 mset.mu.Unlock() 2275 mset.retryMirrorConsumer() 2276 return false 2277 } 2278 } 2279 2280 if pending == 0 { 2281 mset.mirror.lag = 0 2282 } else { 2283 mset.mirror.lag = pending - 1 2284 } 2285 2286 // Check if we allow mirror direct here. If so check they we have mostly caught up. 2287 // The reason we do not require 0 is if the source is active we may always be slightly behind. 2288 if mset.cfg.MirrorDirect && mset.mirror.dsub == nil && pending < dgetCaughtUpThresh { 2289 if err := mset.subscribeToMirrorDirect(); err != nil { 2290 // Disable since we had problems above. 2291 mset.cfg.MirrorDirect = false 2292 } 2293 } 2294 2295 // Do the subject transform if there's one 2296 2297 for _, tr := range mset.mirror.trs { 2298 if tr == nil { 2299 continue 2300 } else { 2301 tsubj, err := tr.Match(m.subj) 2302 if err == nil { 2303 m.subj = tsubj 2304 break 2305 } 2306 } 2307 } 2308 2309 s, js, stype := mset.srv, mset.js, mset.cfg.Storage 2310 mset.mu.Unlock() 2311 2312 var err error 2313 if node != nil { 2314 if js.limitsExceeded(stype) { 2315 s.resourcesExceededError() 2316 err = ApiErrors[JSInsufficientResourcesErr] 2317 } else { 2318 err = node.Propose(encodeStreamMsg(m.subj, _EMPTY_, m.hdr, m.msg, sseq-1, ts)) 2319 } 2320 } else { 2321 err = mset.processJetStreamMsg(m.subj, _EMPTY_, m.hdr, m.msg, sseq-1, ts, nil) 2322 } 2323 if err != nil { 2324 if strings.Contains(err.Error(), "no space left") { 2325 s.Errorf("JetStream out of space, will be DISABLED") 2326 s.DisableJetStream() 2327 return false 2328 } 2329 if err != errLastSeqMismatch { 2330 mset.mu.RLock() 2331 accName, sname := mset.acc.Name, mset.cfg.Name 2332 mset.mu.RUnlock() 2333 s.RateLimitWarnf("Error processing inbound mirror message for '%s' > '%s': %v", 2334 accName, sname, err) 2335 } else { 2336 // We may have missed messages, restart. 2337 if sseq <= mset.lastSeq() { 2338 mset.mu.Lock() 2339 mset.mirror.lag = olag 2340 mset.mirror.sseq = osseq 2341 mset.mirror.dseq = odseq 2342 mset.mu.Unlock() 2343 return false 2344 } else { 2345 mset.mu.Lock() 2346 mset.mirror.dseq = odseq 2347 mset.mirror.sseq = osseq 2348 mset.mu.Unlock() 2349 mset.retryMirrorConsumer() 2350 } 2351 } 2352 } 2353 return err == nil 2354 } 2355 2356 func (mset *stream) setMirrorErr(err *ApiError) { 2357 mset.mu.Lock() 2358 if mset.mirror != nil { 2359 mset.mirror.err = err 2360 } 2361 mset.mu.Unlock() 2362 } 2363 2364 // Cancels a mirror consumer. 2365 // 2366 // Lock held on entry 2367 func (mset *stream) cancelMirrorConsumer() { 2368 if mset.mirror == nil { 2369 return 2370 } 2371 mset.cancelSourceInfo(mset.mirror) 2372 } 2373 2374 // Similar to setupMirrorConsumer except that it will print a debug statement 2375 // indicating that there is a retry. 2376 // 2377 // Lock is acquired in this function 2378 func (mset *stream) retryMirrorConsumer() error { 2379 mset.mu.Lock() 2380 defer mset.mu.Unlock() 2381 mset.srv.Debugf("Retrying mirror consumer for '%s > %s'", mset.acc.Name, mset.cfg.Name) 2382 return mset.setupMirrorConsumer() 2383 } 2384 2385 // Lock should be held. 2386 func (mset *stream) skipMsgs(start, end uint64) { 2387 node, store := mset.node, mset.store 2388 // If we are not clustered we can short circuit now with store.SkipMsgs 2389 if node == nil { 2390 store.SkipMsgs(start, end-start+1) 2391 mset.lseq = end 2392 return 2393 } 2394 2395 // FIXME (dlc) - We should allow proposals of DeleteEange, but would need to make sure all peers support. 2396 // With syncRequest was easy to add bool into request. 2397 var entries []*Entry 2398 for seq := start; seq <= end; seq++ { 2399 entries = append(entries, &Entry{EntryNormal, encodeStreamMsg(_EMPTY_, _EMPTY_, nil, nil, seq-1, 0)}) 2400 // So a single message does not get too big. 2401 if len(entries) > 10_000 { 2402 node.ProposeDirect(entries) 2403 // We need to re-create `entries` because there is a reference 2404 // to it in the node's pae map. 2405 entries = entries[:0] 2406 } 2407 } 2408 // Send all at once. 2409 if len(entries) > 0 { 2410 node.ProposeDirect(entries) 2411 } 2412 } 2413 2414 const ( 2415 // Base retry backoff duration. 2416 retryBackOff = 5 * time.Second 2417 // Maximum amount we will wait. 2418 retryMaximum = 2 * time.Minute 2419 ) 2420 2421 // Calculate our backoff based on number of failures. 2422 func calculateRetryBackoff(fails int) time.Duration { 2423 backoff := time.Duration(retryBackOff) * time.Duration(fails*2) 2424 if backoff > retryMaximum { 2425 backoff = retryMaximum 2426 } 2427 return backoff 2428 } 2429 2430 // This will schedule a call to setupMirrorConsumer, taking into account the last 2431 // time it was retried and determine the soonest setupMirrorConsumer can be called 2432 // without tripping the sourceConsumerRetryThreshold. We will also take into account 2433 // number of failures and will back off our retries. 2434 // The mset.mirror pointer has been verified to be not nil by the caller. 2435 // 2436 // Lock held on entry 2437 func (mset *stream) scheduleSetupMirrorConsumerRetry() { 2438 // We are trying to figure out how soon we can retry. setupMirrorConsumer will reject 2439 // a retry if last was done less than "sourceConsumerRetryThreshold" ago. 2440 next := sourceConsumerRetryThreshold - time.Since(mset.mirror.lreq) 2441 if next < 0 { 2442 // It means that we have passed the threshold and so we are ready to go. 2443 next = 0 2444 } 2445 // Take into account failures here. 2446 next += calculateRetryBackoff(mset.mirror.fails) 2447 2448 // Add some jitter. 2449 next += time.Duration(rand.Intn(int(100*time.Millisecond))) + 100*time.Millisecond 2450 2451 time.AfterFunc(next, func() { 2452 mset.mu.Lock() 2453 mset.setupMirrorConsumer() 2454 mset.mu.Unlock() 2455 }) 2456 } 2457 2458 // Setup our mirror consumer. 2459 // Lock should be held. 2460 func (mset *stream) setupMirrorConsumer() error { 2461 if mset.closed.Load() { 2462 return errStreamClosed 2463 } 2464 if mset.outq == nil { 2465 return errors.New("outq required") 2466 } 2467 // We use to prevent update of a mirror configuration in cluster 2468 // mode but not in standalone. This is now fixed. However, without 2469 // rejecting the update, it could be that if the source stream was 2470 // removed and then later the mirrored stream config changed to 2471 // remove mirror configuration, this function would panic when 2472 // accessing mset.cfg.Mirror fields. Adding this protection in case 2473 // we allow in the future the mirror config to be changed (removed). 2474 if mset.cfg.Mirror == nil { 2475 return errors.New("invalid mirror configuration") 2476 } 2477 2478 // If this is the first time 2479 if mset.mirror == nil { 2480 mset.mirror = &sourceInfo{name: mset.cfg.Mirror.Name} 2481 } else { 2482 mset.cancelSourceInfo(mset.mirror) 2483 mset.mirror.sseq = mset.lseq 2484 2485 // If we are no longer the leader stop trying. 2486 if !mset.isLeader() { 2487 return nil 2488 } 2489 } 2490 mirror := mset.mirror 2491 2492 // We want to throttle here in terms of how fast we request new consumers, 2493 // or if the previous is still in progress. 2494 if last := time.Since(mirror.lreq); last < sourceConsumerRetryThreshold || mirror.sip { 2495 mset.scheduleSetupMirrorConsumerRetry() 2496 return nil 2497 } 2498 mirror.lreq = time.Now() 2499 2500 // Determine subjects etc. 2501 var deliverSubject string 2502 ext := mset.cfg.Mirror.External 2503 2504 if ext != nil && ext.DeliverPrefix != _EMPTY_ { 2505 deliverSubject = strings.ReplaceAll(ext.DeliverPrefix+syncSubject(".M"), "..", ".") 2506 } else { 2507 deliverSubject = syncSubject("$JS.M") 2508 } 2509 2510 // Now send off request to create/update our consumer. This will be all API based even in single server mode. 2511 // We calculate durable names apriori so we do not need to save them off. 2512 2513 var state StreamState 2514 mset.store.FastState(&state) 2515 2516 req := &CreateConsumerRequest{ 2517 Stream: mset.cfg.Mirror.Name, 2518 Config: ConsumerConfig{ 2519 DeliverSubject: deliverSubject, 2520 DeliverPolicy: DeliverByStartSequence, 2521 OptStartSeq: state.LastSeq + 1, 2522 AckPolicy: AckNone, 2523 AckWait: 22 * time.Hour, 2524 MaxDeliver: 1, 2525 Heartbeat: sourceHealthCheckInterval, 2526 FlowControl: true, 2527 Direct: true, 2528 }, 2529 } 2530 2531 // Only use start optionals on first time. 2532 if state.Msgs == 0 && state.FirstSeq == 0 { 2533 req.Config.OptStartSeq = 0 2534 if mset.cfg.Mirror.OptStartSeq > 0 { 2535 req.Config.OptStartSeq = mset.cfg.Mirror.OptStartSeq 2536 } else if mset.cfg.Mirror.OptStartTime != nil { 2537 req.Config.OptStartTime = mset.cfg.Mirror.OptStartTime 2538 req.Config.DeliverPolicy = DeliverByStartTime 2539 } 2540 } 2541 if req.Config.OptStartSeq == 0 && req.Config.OptStartTime == nil { 2542 // If starting out and lastSeq is 0. 2543 req.Config.DeliverPolicy = DeliverAll 2544 } 2545 2546 // Filters 2547 if mset.cfg.Mirror.FilterSubject != _EMPTY_ { 2548 req.Config.FilterSubject = mset.cfg.Mirror.FilterSubject 2549 mirror.sf = mset.cfg.Mirror.FilterSubject 2550 } 2551 2552 if lst := len(mset.cfg.Mirror.SubjectTransforms); lst > 0 { 2553 sfs := make([]string, lst) 2554 trs := make([]*subjectTransform, lst) 2555 2556 for i, tr := range mset.cfg.Mirror.SubjectTransforms { 2557 // will not fail as already checked before that the transform will work 2558 subjectTransform, err := NewSubjectTransform(tr.Source, tr.Destination) 2559 if err != nil { 2560 mset.srv.Errorf("Unable to get transform for mirror consumer: %v", err) 2561 } 2562 sfs[i] = tr.Source 2563 trs[i] = subjectTransform 2564 } 2565 mirror.sfs = sfs 2566 mirror.trs = trs 2567 req.Config.FilterSubjects = sfs 2568 } 2569 2570 respCh := make(chan *JSApiConsumerCreateResponse, 1) 2571 reply := infoReplySubject() 2572 crSub, err := mset.subscribeInternal(reply, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 2573 mset.unsubscribe(sub) 2574 _, msg := c.msgParts(rmsg) 2575 2576 var ccr JSApiConsumerCreateResponse 2577 if err := json.Unmarshal(msg, &ccr); err != nil { 2578 c.Warnf("JetStream bad mirror consumer create response: %q", msg) 2579 mset.setMirrorErr(ApiErrors[JSInvalidJSONErr]) 2580 return 2581 } 2582 select { 2583 case respCh <- &ccr: 2584 default: 2585 } 2586 }) 2587 if err != nil { 2588 mirror.err = NewJSMirrorConsumerSetupFailedError(err, Unless(err)) 2589 mset.scheduleSetupMirrorConsumerRetry() 2590 return nil 2591 } 2592 2593 b, _ := json.Marshal(req) 2594 2595 var subject string 2596 if req.Config.FilterSubject != _EMPTY_ { 2597 req.Config.Name = fmt.Sprintf("mirror-%s", createConsumerName()) 2598 subject = fmt.Sprintf(JSApiConsumerCreateExT, mset.cfg.Mirror.Name, req.Config.Name, req.Config.FilterSubject) 2599 } else { 2600 subject = fmt.Sprintf(JSApiConsumerCreateT, mset.cfg.Mirror.Name) 2601 } 2602 if ext != nil { 2603 subject = strings.Replace(subject, JSApiPrefix, ext.ApiPrefix, 1) 2604 subject = strings.ReplaceAll(subject, "..", ".") 2605 } 2606 2607 // Reset 2608 mirror.msgs = nil 2609 mirror.err = nil 2610 mirror.sip = true 2611 2612 // Send the consumer create request 2613 mset.outq.send(newJSPubMsg(subject, _EMPTY_, reply, nil, b, nil, 0)) 2614 2615 go func() { 2616 2617 var retry bool 2618 defer func() { 2619 mset.mu.Lock() 2620 // Check that this is still valid and if so, clear the "setup in progress" flag. 2621 if mset.mirror != nil { 2622 mset.mirror.sip = false 2623 // If we need to retry, schedule now 2624 if retry { 2625 mset.mirror.fails++ 2626 // Cancel here since we can not do anything with this consumer at this point. 2627 mset.cancelSourceInfo(mset.mirror) 2628 mset.scheduleSetupMirrorConsumerRetry() 2629 } else { 2630 // Clear on success. 2631 mset.mirror.fails = 0 2632 } 2633 } 2634 mset.mu.Unlock() 2635 }() 2636 2637 // Wait for previous processMirrorMsgs go routine to be completely done. 2638 // If none is running, this will not block. 2639 mirror.wg.Wait() 2640 2641 select { 2642 case ccr := <-respCh: 2643 mset.mu.Lock() 2644 // Mirror config has been removed. 2645 if mset.mirror == nil { 2646 mset.mu.Unlock() 2647 return 2648 } 2649 ready := sync.WaitGroup{} 2650 mirror := mset.mirror 2651 mirror.err = nil 2652 if ccr.Error != nil || ccr.ConsumerInfo == nil { 2653 mset.srv.Warnf("JetStream error response for create mirror consumer: %+v", ccr.Error) 2654 mirror.err = ccr.Error 2655 // Let's retry as soon as possible, but we are gated by sourceConsumerRetryThreshold 2656 retry = true 2657 mset.mu.Unlock() 2658 return 2659 } else { 2660 // Setup actual subscription to process messages from our source. 2661 qname := fmt.Sprintf("[ACC:%s] stream mirror '%s' of '%s' msgs", mset.acc.Name, mset.cfg.Name, mset.cfg.Mirror.Name) 2662 // Create a new queue each time 2663 mirror.msgs = newIPQueue[*inMsg](mset.srv, qname) 2664 msgs := mirror.msgs 2665 sub, err := mset.subscribeInternal(deliverSubject, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 2666 hdr, msg := c.msgParts(copyBytes(rmsg)) // Need to copy. 2667 mset.queueInbound(msgs, subject, reply, hdr, msg, nil) 2668 }) 2669 if err != nil { 2670 mirror.err = NewJSMirrorConsumerSetupFailedError(err, Unless(err)) 2671 retry = true 2672 mset.mu.Unlock() 2673 return 2674 } 2675 // Save our sub. 2676 mirror.sub = sub 2677 2678 // When an upstream stream expires messages or in general has messages that we want 2679 // that are no longer available we need to adjust here. 2680 var state StreamState 2681 mset.store.FastState(&state) 2682 2683 // Check if we need to skip messages. 2684 if state.LastSeq != ccr.ConsumerInfo.Delivered.Stream { 2685 // Check to see if delivered is past our last and we have no msgs. This will help the 2686 // case when mirroring a stream that has a very high starting sequence number. 2687 if state.Msgs == 0 && ccr.ConsumerInfo.Delivered.Stream > state.LastSeq { 2688 mset.store.PurgeEx(_EMPTY_, ccr.ConsumerInfo.Delivered.Stream+1, 0) 2689 mset.lseq = ccr.ConsumerInfo.Delivered.Stream 2690 } else { 2691 mset.skipMsgs(state.LastSeq+1, ccr.ConsumerInfo.Delivered.Stream) 2692 } 2693 } 2694 2695 // Capture consumer name. 2696 mirror.cname = ccr.ConsumerInfo.Name 2697 mirror.dseq = 0 2698 mirror.sseq = ccr.ConsumerInfo.Delivered.Stream 2699 mirror.qch = make(chan struct{}) 2700 mirror.wg.Add(1) 2701 ready.Add(1) 2702 if !mset.srv.startGoRoutine( 2703 func() { mset.processMirrorMsgs(mirror, &ready) }, 2704 pprofLabels{ 2705 "type": "mirror", 2706 "account": mset.acc.Name, 2707 "stream": mset.cfg.Name, 2708 "consumer": mirror.cname, 2709 }, 2710 ) { 2711 ready.Done() 2712 } 2713 } 2714 mset.mu.Unlock() 2715 ready.Wait() 2716 case <-time.After(5 * time.Second): 2717 mset.unsubscribe(crSub) 2718 // We already waited 5 seconds, let's retry now. 2719 retry = true 2720 } 2721 }() 2722 2723 return nil 2724 } 2725 2726 func (mset *stream) streamSource(iname string) *StreamSource { 2727 for _, ssi := range mset.cfg.Sources { 2728 if ssi.iname == iname { 2729 return ssi 2730 } 2731 } 2732 return nil 2733 } 2734 2735 func (mset *stream) retrySourceConsumer(iName string) { 2736 mset.mu.Lock() 2737 defer mset.mu.Unlock() 2738 2739 si := mset.sources[iName] 2740 if si == nil { 2741 return 2742 } 2743 var ss = mset.streamSource(iName) 2744 if ss != nil { 2745 iNameMap := map[string]struct{}{ 2746 iName: {}, 2747 } 2748 mset.setStartingSequenceForSources(iNameMap) 2749 mset.retrySourceConsumerAtSeq(iName, si.sseq+1) 2750 } 2751 } 2752 2753 // Same than setSourceConsumer but simply issue a debug statement indicating 2754 // that there is a retry. 2755 // 2756 // Lock should be held. 2757 func (mset *stream) retrySourceConsumerAtSeq(iname string, seq uint64) { 2758 s := mset.srv 2759 2760 s.Debugf("Retrying source consumer for '%s > %s'", mset.acc.Name, mset.cfg.Name) 2761 2762 // setSourceConsumer will check that the source is still configured. 2763 mset.setSourceConsumer(iname, seq, time.Time{}) 2764 } 2765 2766 // Lock should be held. 2767 func (mset *stream) cancelSourceConsumer(iname string) { 2768 if si := mset.sources[iname]; si != nil { 2769 mset.cancelSourceInfo(si) 2770 si.sseq, si.dseq = 0, 0 2771 } 2772 } 2773 2774 // The `si` has been verified to be not nil. The sourceInfo's sub will 2775 // be unsubscribed and set to nil (if not already done) and the 2776 // cname will be reset. The message processing's go routine quit channel 2777 // will be closed if still opened. 2778 // 2779 // Lock should be held 2780 func (mset *stream) cancelSourceInfo(si *sourceInfo) { 2781 if si.sub != nil { 2782 mset.unsubscribe(si.sub) 2783 si.sub = nil 2784 } 2785 // In case we had a mirror direct subscription. 2786 if si.dsub != nil { 2787 mset.unsubscribe(si.dsub) 2788 si.dsub = nil 2789 } 2790 mset.removeInternalConsumer(si) 2791 if si.qch != nil { 2792 close(si.qch) 2793 si.qch = nil 2794 } 2795 if si.msgs != nil { 2796 si.msgs.drain() 2797 si.msgs.unregister() 2798 } 2799 } 2800 2801 const sourceConsumerRetryThreshold = 2 * time.Second 2802 2803 // This will schedule a call to setSourceConsumer, taking into account the last 2804 // time it was retried and determine the soonest setSourceConsumer can be called 2805 // without tripping the sourceConsumerRetryThreshold. 2806 // 2807 // Lock held on entry 2808 func (mset *stream) scheduleSetSourceConsumerRetry(si *sourceInfo, seq uint64, startTime time.Time) { 2809 // We are trying to figure out how soon we can retry. setSourceConsumer will reject 2810 // a retry if last was done less than "sourceConsumerRetryThreshold" ago. 2811 next := sourceConsumerRetryThreshold - time.Since(si.lreq) 2812 if next < 0 { 2813 // It means that we have passed the threshold and so we are ready to go. 2814 next = 0 2815 } 2816 // Take into account failures here. 2817 next += calculateRetryBackoff(si.fails) 2818 2819 // To make *sure* that the next request will not fail, add a bit of buffer 2820 // and some randomness. 2821 next += time.Duration(rand.Intn(int(10*time.Millisecond))) + 10*time.Millisecond 2822 mset.scheduleSetSourceConsumer(si.iname, seq, next, startTime) 2823 } 2824 2825 // Simply schedules setSourceConsumer at the given delay. 2826 // 2827 // Lock held on entry 2828 func (mset *stream) scheduleSetSourceConsumer(iname string, seq uint64, delay time.Duration, startTime time.Time) { 2829 if mset.sourceRetries == nil { 2830 mset.sourceRetries = map[string]*time.Timer{} 2831 } 2832 if t, ok := mset.sourceRetries[iname]; ok && !t.Stop() { 2833 // It looks like the goroutine has started running but hasn't taken the 2834 // stream lock yet (otherwise the map entry would be deleted). We had 2835 // might as well let the running goroutine complete and schedule another 2836 // timer only if it needs to. 2837 return 2838 } 2839 mset.sourceRetries[iname] = time.AfterFunc(delay, func() { 2840 mset.mu.Lock() 2841 defer mset.mu.Unlock() 2842 2843 delete(mset.sourceRetries, iname) 2844 mset.setSourceConsumer(iname, seq, startTime) 2845 }) 2846 } 2847 2848 // Lock should be held. 2849 func (mset *stream) setSourceConsumer(iname string, seq uint64, startTime time.Time) { 2850 // Ignore if closed. 2851 if mset.closed.Load() { 2852 return 2853 } 2854 2855 si := mset.sources[iname] 2856 if si == nil { 2857 return 2858 } 2859 // Cancel previous instance if applicable 2860 mset.cancelSourceInfo(si) 2861 2862 ssi := mset.streamSource(iname) 2863 if ssi == nil { 2864 return 2865 } 2866 2867 // We want to throttle here in terms of how fast we request new consumers, 2868 // or if the previous is still in progress. 2869 if last := time.Since(si.lreq); last < sourceConsumerRetryThreshold || si.sip { 2870 mset.scheduleSetSourceConsumerRetry(si, seq, startTime) 2871 return 2872 } 2873 si.lreq = time.Now() 2874 2875 // Determine subjects etc. 2876 var deliverSubject string 2877 ext := ssi.External 2878 2879 if ext != nil && ext.DeliverPrefix != _EMPTY_ { 2880 deliverSubject = strings.ReplaceAll(ext.DeliverPrefix+syncSubject(".S"), "..", ".") 2881 } else { 2882 deliverSubject = syncSubject("$JS.S") 2883 } 2884 2885 req := &CreateConsumerRequest{ 2886 Stream: si.name, 2887 Config: ConsumerConfig{ 2888 DeliverSubject: deliverSubject, 2889 AckPolicy: AckNone, 2890 AckWait: 22 * time.Hour, 2891 MaxDeliver: 1, 2892 Heartbeat: sourceHealthCheckInterval, 2893 FlowControl: true, 2894 Direct: true, 2895 }, 2896 } 2897 2898 // If starting, check any configs. 2899 if !startTime.IsZero() && seq > 1 { 2900 req.Config.OptStartTime = &startTime 2901 req.Config.DeliverPolicy = DeliverByStartTime 2902 } else if seq <= 1 { 2903 if ssi.OptStartSeq > 0 { 2904 req.Config.OptStartSeq = ssi.OptStartSeq 2905 req.Config.DeliverPolicy = DeliverByStartSequence 2906 } else if ssi.OptStartTime != nil { 2907 // Check to see if our configured start is before what we remember. 2908 // Applicable on restart similar to below. 2909 if ssi.OptStartTime.Before(si.start) { 2910 req.Config.OptStartTime = &si.start 2911 } else { 2912 req.Config.OptStartTime = ssi.OptStartTime 2913 } 2914 req.Config.DeliverPolicy = DeliverByStartTime 2915 } else if !si.start.IsZero() { 2916 // We are falling back to time based startup on a recover, but our messages are gone. e.g. purge, expired, retention policy. 2917 req.Config.OptStartTime = &si.start 2918 req.Config.DeliverPolicy = DeliverByStartTime 2919 } 2920 } else { 2921 req.Config.OptStartSeq = seq 2922 req.Config.DeliverPolicy = DeliverByStartSequence 2923 } 2924 // Filters 2925 if ssi.FilterSubject != _EMPTY_ { 2926 req.Config.FilterSubject = ssi.FilterSubject 2927 } 2928 2929 var filterSubjects []string 2930 for _, tr := range ssi.SubjectTransforms { 2931 filterSubjects = append(filterSubjects, tr.Source) 2932 } 2933 req.Config.FilterSubjects = filterSubjects 2934 2935 respCh := make(chan *JSApiConsumerCreateResponse, 1) 2936 reply := infoReplySubject() 2937 crSub, err := mset.subscribeInternal(reply, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 2938 mset.unsubscribe(sub) 2939 _, msg := c.msgParts(rmsg) 2940 var ccr JSApiConsumerCreateResponse 2941 if err := json.Unmarshal(msg, &ccr); err != nil { 2942 c.Warnf("JetStream bad source consumer create response: %q", msg) 2943 return 2944 } 2945 select { 2946 case respCh <- &ccr: 2947 default: 2948 } 2949 }) 2950 if err != nil { 2951 si.err = NewJSSourceConsumerSetupFailedError(err, Unless(err)) 2952 mset.scheduleSetSourceConsumerRetry(si, seq, startTime) 2953 return 2954 } 2955 2956 var subject string 2957 if req.Config.FilterSubject != _EMPTY_ { 2958 req.Config.Name = fmt.Sprintf("src-%s", createConsumerName()) 2959 subject = fmt.Sprintf(JSApiConsumerCreateExT, si.name, req.Config.Name, req.Config.FilterSubject) 2960 } else if len(req.Config.FilterSubjects) == 1 { 2961 req.Config.Name = fmt.Sprintf("src-%s", createConsumerName()) 2962 // It is necessary to switch to using FilterSubject here as the extended consumer 2963 // create API checks for it, so as to not accidentally allow multiple filtered subjects. 2964 req.Config.FilterSubject = req.Config.FilterSubjects[0] 2965 req.Config.FilterSubjects = nil 2966 subject = fmt.Sprintf(JSApiConsumerCreateExT, si.name, req.Config.Name, req.Config.FilterSubject) 2967 } else { 2968 subject = fmt.Sprintf(JSApiConsumerCreateT, si.name) 2969 } 2970 if ext != nil { 2971 subject = strings.Replace(subject, JSApiPrefix, ext.ApiPrefix, 1) 2972 subject = strings.ReplaceAll(subject, "..", ".") 2973 } 2974 2975 // Marshal request. 2976 b, _ := json.Marshal(req) 2977 2978 // Reset 2979 si.msgs = nil 2980 si.err = nil 2981 si.sip = true 2982 2983 // Send the consumer create request 2984 mset.outq.send(newJSPubMsg(subject, _EMPTY_, reply, nil, b, nil, 0)) 2985 2986 go func() { 2987 2988 var retry bool 2989 defer func() { 2990 mset.mu.Lock() 2991 // Check that this is still valid and if so, clear the "setup in progress" flag. 2992 if si := mset.sources[iname]; si != nil { 2993 si.sip = false 2994 // If we need to retry, schedule now 2995 if retry { 2996 si.fails++ 2997 // Cancel here since we can not do anything with this consumer at this point. 2998 mset.cancelSourceInfo(si) 2999 mset.scheduleSetSourceConsumerRetry(si, seq, startTime) 3000 } else { 3001 // Clear on success. 3002 si.fails = 0 3003 } 3004 } 3005 mset.mu.Unlock() 3006 }() 3007 3008 // Wait for previous processSourceMsgs go routine to be completely done. 3009 // If none is running, this will not block. 3010 si.wg.Wait() 3011 3012 select { 3013 case ccr := <-respCh: 3014 ready := sync.WaitGroup{} 3015 mset.mu.Lock() 3016 // Check that it has not been removed or canceled (si.sub would be nil) 3017 if si := mset.sources[iname]; si != nil { 3018 si.err = nil 3019 if ccr.Error != nil || ccr.ConsumerInfo == nil { 3020 // Note: this warning can happen a few times when starting up the server when sourcing streams are 3021 // defined, this is normal as the streams are re-created in no particular order and it is possible 3022 // that a stream sourcing another could come up before all of its sources have been recreated. 3023 mset.srv.Warnf("JetStream error response for stream %s create source consumer %s: %+v", mset.cfg.Name, si.name, ccr.Error) 3024 si.err = ccr.Error 3025 // Let's retry as soon as possible, but we are gated by sourceConsumerRetryThreshold 3026 retry = true 3027 mset.mu.Unlock() 3028 return 3029 } else { 3030 // Setup actual subscription to process messages from our source. 3031 qname := fmt.Sprintf("[ACC:%s] stream source '%s' from '%s' msgs", mset.acc.Name, mset.cfg.Name, si.name) 3032 // Create a new queue each time 3033 si.msgs = newIPQueue[*inMsg](mset.srv, qname) 3034 msgs := si.msgs 3035 sub, err := mset.subscribeInternal(deliverSubject, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 3036 hdr, msg := c.msgParts(copyBytes(rmsg)) // Need to copy. 3037 mset.queueInbound(msgs, subject, reply, hdr, msg, nil) 3038 }) 3039 if err != nil { 3040 si.err = NewJSSourceConsumerSetupFailedError(err, Unless(err)) 3041 retry = true 3042 mset.mu.Unlock() 3043 return 3044 } 3045 // Save our sub. 3046 si.sub = sub 3047 3048 if si.sseq != ccr.ConsumerInfo.Delivered.Stream { 3049 si.sseq = ccr.ConsumerInfo.Delivered.Stream + 1 3050 } 3051 // Capture consumer name. 3052 si.cname = ccr.ConsumerInfo.Name 3053 // Do not set si.sseq to seq here. si.sseq will be set in processInboundSourceMsg 3054 si.dseq = 0 3055 si.qch = make(chan struct{}) 3056 si.wg.Add(1) 3057 ready.Add(1) 3058 if !mset.srv.startGoRoutine( 3059 func() { mset.processSourceMsgs(si, &ready) }, 3060 pprofLabels{ 3061 "type": "source", 3062 "account": mset.acc.Name, 3063 "stream": mset.cfg.Name, 3064 "consumer": si.cname, 3065 }, 3066 ) { 3067 ready.Done() 3068 } 3069 } 3070 } 3071 mset.mu.Unlock() 3072 ready.Wait() 3073 case <-time.After(5 * time.Second): 3074 mset.unsubscribe(crSub) 3075 // We already waited 5 seconds, let's retry now. 3076 retry = true 3077 } 3078 }() 3079 } 3080 3081 func (mset *stream) processSourceMsgs(si *sourceInfo, ready *sync.WaitGroup) { 3082 s := mset.srv 3083 defer func() { 3084 si.wg.Done() 3085 s.grWG.Done() 3086 }() 3087 3088 // Grab some stream and sourceInfo values now... 3089 mset.mu.Lock() 3090 msgs, qch, siqch, iname := si.msgs, mset.qch, si.qch, si.iname 3091 // Set the last seen as now so that we don't fail at the first check. 3092 si.last = time.Now() 3093 mset.mu.Unlock() 3094 3095 // Signal the caller that we have captured the above fields. 3096 ready.Done() 3097 3098 t := time.NewTicker(sourceHealthCheckInterval) 3099 defer t.Stop() 3100 3101 for { 3102 select { 3103 case <-s.quitCh: 3104 return 3105 case <-qch: 3106 return 3107 case <-siqch: 3108 return 3109 case <-msgs.ch: 3110 ims := msgs.pop() 3111 for _, im := range ims { 3112 if !mset.processInboundSourceMsg(si, im) { 3113 break 3114 } 3115 } 3116 msgs.recycle(&ims) 3117 case <-t.C: 3118 mset.mu.RLock() 3119 isLeader := mset.isLeader() 3120 stalled := time.Since(si.last) > 3*sourceHealthCheckInterval 3121 mset.mu.RUnlock() 3122 // No longer leader. 3123 if !isLeader { 3124 mset.mu.Lock() 3125 mset.cancelSourceConsumer(iname) 3126 mset.mu.Unlock() 3127 return 3128 } 3129 // We are stalled. 3130 if stalled { 3131 mset.mu.Lock() 3132 // We don't need to schedule here, we are going to simply 3133 // call setSourceConsumer with the current state+1. 3134 mset.setSourceConsumer(iname, si.sseq+1, time.Time{}) 3135 mset.mu.Unlock() 3136 } 3137 } 3138 } 3139 } 3140 3141 // isControlMsg determines if this is a control message. 3142 func (m *inMsg) isControlMsg() bool { 3143 return len(m.msg) == 0 && len(m.hdr) > 0 && bytes.HasPrefix(m.hdr, []byte("NATS/1.0 100 ")) 3144 } 3145 3146 // Sends a reply to a flow control request. 3147 func (mset *stream) sendFlowControlReply(reply string) { 3148 mset.mu.RLock() 3149 if mset.isLeader() && mset.outq != nil { 3150 mset.outq.sendMsg(reply, nil) 3151 } 3152 mset.mu.RUnlock() 3153 } 3154 3155 // handleFlowControl will properly handle flow control messages for both R==1 and R>1. 3156 // Lock should be held. 3157 func (mset *stream) handleFlowControl(m *inMsg) { 3158 // If we are clustered we will send the flow control message through the replication stack. 3159 if mset.isClustered() { 3160 mset.node.Propose(encodeStreamMsg(_EMPTY_, m.rply, m.hdr, nil, 0, 0)) 3161 } else { 3162 mset.outq.sendMsg(m.rply, nil) 3163 } 3164 } 3165 3166 // processInboundSourceMsg handles processing other stream messages bound for this stream. 3167 func (mset *stream) processInboundSourceMsg(si *sourceInfo, m *inMsg) bool { 3168 mset.mu.Lock() 3169 3170 // If we are no longer the leader cancel this subscriber. 3171 if !mset.isLeader() { 3172 mset.cancelSourceConsumer(si.iname) 3173 mset.mu.Unlock() 3174 return false 3175 } 3176 3177 isControl := m.isControlMsg() 3178 3179 // Ignore from old subscriptions. 3180 if !si.isCurrentSub(m.rply) && !isControl { 3181 mset.mu.Unlock() 3182 return false 3183 } 3184 3185 si.last = time.Now() 3186 node := mset.node 3187 3188 // Check for heartbeats and flow control messages. 3189 if isControl { 3190 var needsRetry bool 3191 // Flow controls have reply subjects. 3192 if m.rply != _EMPTY_ { 3193 mset.handleFlowControl(m) 3194 } else { 3195 // For idle heartbeats make sure we did not miss anything. 3196 if ldseq := parseInt64(getHeader(JSLastConsumerSeq, m.hdr)); ldseq > 0 && uint64(ldseq) != si.dseq { 3197 needsRetry = true 3198 mset.retrySourceConsumerAtSeq(si.iname, si.sseq+1) 3199 } else if fcReply := getHeader(JSConsumerStalled, m.hdr); len(fcReply) > 0 { 3200 // Other side thinks we are stalled, so send flow control reply. 3201 mset.outq.sendMsg(string(fcReply), nil) 3202 } 3203 } 3204 mset.mu.Unlock() 3205 return !needsRetry 3206 } 3207 3208 sseq, dseq, dc, _, pending := replyInfo(m.rply) 3209 3210 if dc > 1 { 3211 mset.mu.Unlock() 3212 return false 3213 } 3214 3215 // Tracking is done here. 3216 if dseq == si.dseq+1 { 3217 si.dseq++ 3218 si.sseq = sseq 3219 } else if dseq > si.dseq { 3220 if si.cname == _EMPTY_ { 3221 si.cname = tokenAt(m.rply, 4) 3222 si.dseq, si.sseq = dseq, sseq 3223 } else { 3224 mset.retrySourceConsumerAtSeq(si.iname, si.sseq+1) 3225 mset.mu.Unlock() 3226 return false 3227 } 3228 } else { 3229 mset.mu.Unlock() 3230 return false 3231 } 3232 3233 if pending == 0 { 3234 si.lag = 0 3235 } else { 3236 si.lag = pending - 1 3237 } 3238 mset.mu.Unlock() 3239 3240 hdr, msg := m.hdr, m.msg 3241 3242 // If we are daisy chained here make sure to remove the original one. 3243 if len(hdr) > 0 { 3244 hdr = removeHeaderIfPresent(hdr, JSStreamSource) 3245 3246 // Remove any Nats-Expected- headers as we don't want to validate them. 3247 hdr = removeHeaderIfPrefixPresent(hdr, "Nats-Expected-") 3248 } 3249 // Hold onto the origin reply which has all the metadata. 3250 hdr = genHeader(hdr, JSStreamSource, si.genSourceHeader(m.rply)) 3251 3252 // Do the subject transform for the source if there's one 3253 3254 for _, tr := range si.trs { 3255 if tr == nil { 3256 continue 3257 } else { 3258 tsubj, err := tr.Match(m.subj) 3259 if err == nil { 3260 m.subj = tsubj 3261 break 3262 } 3263 } 3264 } 3265 3266 var err error 3267 // If we are clustered we need to propose this message to the underlying raft group. 3268 if node != nil { 3269 err = mset.processClusteredInboundMsg(m.subj, _EMPTY_, hdr, msg, nil) 3270 } else { 3271 err = mset.processJetStreamMsg(m.subj, _EMPTY_, hdr, msg, 0, 0, nil) 3272 } 3273 3274 if err != nil { 3275 s := mset.srv 3276 if strings.Contains(err.Error(), "no space left") { 3277 s.Errorf("JetStream out of space, will be DISABLED") 3278 s.DisableJetStream() 3279 } else { 3280 mset.mu.RLock() 3281 accName, sname, iname := mset.acc.Name, mset.cfg.Name, si.iname 3282 mset.mu.RUnlock() 3283 // Log some warning for errors other than errLastSeqMismatch 3284 if err != errLastSeqMismatch { 3285 s.RateLimitWarnf("Error processing inbound source %q for '%s' > '%s': %v", 3286 iname, accName, sname, err) 3287 } 3288 // Retry in all type of errors. 3289 // This will make sure the source is still in mset.sources map, 3290 // find the last sequence and then call setSourceConsumer. 3291 mset.retrySourceConsumer(iname) 3292 } 3293 return false 3294 } 3295 3296 return true 3297 } 3298 3299 // Generate a new (2.10) style source header (stream name, sequence number, source filter, source destination transform). 3300 func (si *sourceInfo) genSourceHeader(reply string) string { 3301 var b strings.Builder 3302 iNameParts := strings.Split(si.iname, " ") 3303 3304 b.WriteString(iNameParts[0]) 3305 b.WriteByte(' ') 3306 // Grab sequence as text here from reply subject. 3307 var tsa [expectedNumReplyTokens]string 3308 start, tokens := 0, tsa[:0] 3309 for i := 0; i < len(reply); i++ { 3310 if reply[i] == btsep { 3311 tokens, start = append(tokens, reply[start:i]), i+1 3312 } 3313 } 3314 tokens = append(tokens, reply[start:]) 3315 seq := "1" // Default 3316 if len(tokens) == expectedNumReplyTokens && tokens[0] == "$JS" && tokens[1] == "ACK" { 3317 seq = tokens[5] 3318 } 3319 b.WriteString(seq) 3320 3321 b.WriteByte(' ') 3322 b.WriteString(iNameParts[1]) 3323 b.WriteByte(' ') 3324 b.WriteString(iNameParts[2]) 3325 return b.String() 3326 } 3327 3328 // Original version of header that stored ack reply direct. 3329 func streamAndSeqFromAckReply(reply string) (string, string, uint64) { 3330 tsa := [expectedNumReplyTokens]string{} 3331 start, tokens := 0, tsa[:0] 3332 for i := 0; i < len(reply); i++ { 3333 if reply[i] == btsep { 3334 tokens, start = append(tokens, reply[start:i]), i+1 3335 } 3336 } 3337 tokens = append(tokens, reply[start:]) 3338 if len(tokens) != expectedNumReplyTokens || tokens[0] != "$JS" || tokens[1] != "ACK" { 3339 return _EMPTY_, _EMPTY_, 0 3340 } 3341 return tokens[2], _EMPTY_, uint64(parseAckReplyNum(tokens[5])) 3342 } 3343 3344 // Extract the stream name, the source index name and the message sequence number from the source header. 3345 // Uses the filter and transform arguments to provide backwards compatibility 3346 func streamAndSeq(shdr string) (string, string, uint64) { 3347 if strings.HasPrefix(shdr, jsAckPre) { 3348 return streamAndSeqFromAckReply(shdr) 3349 } 3350 // New version which is stream index name <SPC> sequence 3351 fields := strings.Split(shdr, " ") 3352 nFields := len(fields) 3353 3354 if nFields != 2 && nFields <= 3 { 3355 return _EMPTY_, _EMPTY_, 0 3356 } 3357 3358 if nFields >= 4 { 3359 return fields[0], strings.Join([]string{fields[0], fields[2], fields[3]}, " "), uint64(parseAckReplyNum(fields[1])) 3360 } else { 3361 return fields[0], _EMPTY_, uint64(parseAckReplyNum(fields[1])) 3362 } 3363 3364 } 3365 3366 // Lock should be held. 3367 func (mset *stream) setStartingSequenceForSources(iNames map[string]struct{}) { 3368 var state StreamState 3369 mset.store.FastState(&state) 3370 3371 // Do not reset sseq here so we can remember when purge/expiration happens. 3372 if state.Msgs == 0 { 3373 for iName := range iNames { 3374 si := mset.sources[iName] 3375 if si == nil { 3376 continue 3377 } else { 3378 si.dseq = 0 3379 } 3380 } 3381 return 3382 } 3383 3384 var smv StoreMsg 3385 for seq := state.LastSeq; seq >= state.FirstSeq; seq-- { 3386 sm, err := mset.store.LoadMsg(seq, &smv) 3387 if err != nil || len(sm.hdr) == 0 { 3388 continue 3389 } 3390 ss := getHeader(JSStreamSource, sm.hdr) 3391 if len(ss) == 0 { 3392 continue 3393 } 3394 streamName, indexName, sseq := streamAndSeq(string(ss)) 3395 3396 if _, ok := iNames[indexName]; ok { 3397 si := mset.sources[indexName] 3398 si.sseq = sseq 3399 si.dseq = 0 3400 delete(iNames, indexName) 3401 } else if indexName == _EMPTY_ && streamName != _EMPTY_ { 3402 for iName := range iNames { 3403 // TODO streamSource is a linear walk, to optimize later 3404 if si := mset.sources[iName]; si != nil && streamName == si.name || 3405 (mset.streamSource(iName).External != nil && streamName == si.name+":"+getHash(mset.streamSource(iName).External.ApiPrefix)) { 3406 si.sseq = sseq 3407 si.dseq = 0 3408 delete(iNames, iName) 3409 break 3410 } 3411 } 3412 } 3413 if len(iNames) == 0 { 3414 break 3415 } 3416 } 3417 } 3418 3419 // lock should be held. 3420 // Resets the SourceInfo for all the sources 3421 func (mset *stream) resetSourceInfo() { 3422 mset.sources = make(map[string]*sourceInfo) 3423 3424 for _, ssi := range mset.cfg.Sources { 3425 if ssi.iname == _EMPTY_ { 3426 ssi.setIndexName() 3427 } 3428 3429 var si *sourceInfo 3430 3431 if len(ssi.SubjectTransforms) == 0 { 3432 si = &sourceInfo{name: ssi.Name, iname: ssi.iname, sf: ssi.FilterSubject} 3433 } else { 3434 sfs := make([]string, len(ssi.SubjectTransforms)) 3435 trs := make([]*subjectTransform, len(ssi.SubjectTransforms)) 3436 for i, str := range ssi.SubjectTransforms { 3437 tr, err := NewSubjectTransform(str.Source, str.Destination) 3438 if err != nil { 3439 mset.srv.Errorf("Unable to get subject transform for source: %v", err) 3440 } 3441 sfs[i] = str.Source 3442 trs[i] = tr 3443 } 3444 si = &sourceInfo{name: ssi.Name, iname: ssi.iname, sfs: sfs, trs: trs} 3445 } 3446 mset.sources[ssi.iname] = si 3447 } 3448 } 3449 3450 // Lock should be held. 3451 // This will do a reverse scan on startup or leader election 3452 // searching for the starting sequence number. 3453 // This can be slow in degenerative cases. 3454 // Lock should be held. 3455 func (mset *stream) startingSequenceForSources() { 3456 if len(mset.cfg.Sources) == 0 { 3457 return 3458 } 3459 3460 // Always reset here. 3461 mset.resetSourceInfo() 3462 3463 var state StreamState 3464 mset.store.FastState(&state) 3465 3466 // If the last time has been stamped remember in case we need to fall back to this for any given upstream source. 3467 // TODO(dlc) - This will be ok, but should formalize with new approach and more formal and durable state. 3468 if !state.LastTime.IsZero() { 3469 for _, si := range mset.sources { 3470 si.start = state.LastTime 3471 } 3472 } 3473 // Bail if no messages, meaning no context. 3474 if state.Msgs == 0 { 3475 return 3476 } 3477 3478 // For short circuiting return. 3479 expected := len(mset.cfg.Sources) 3480 seqs := make(map[string]uint64) 3481 3482 // Stamp our si seq records on the way out. 3483 defer func() { 3484 for sname, seq := range seqs { 3485 // Ignore if not set. 3486 if seq == 0 { 3487 continue 3488 } 3489 if si := mset.sources[sname]; si != nil { 3490 si.sseq = seq 3491 si.dseq = 0 3492 } 3493 } 3494 }() 3495 3496 var smv StoreMsg 3497 for seq := state.LastSeq; seq >= state.FirstSeq; seq-- { 3498 sm, err := mset.store.LoadMsg(seq, &smv) 3499 if err != nil || sm == nil || len(sm.hdr) == 0 { 3500 continue 3501 } 3502 ss := getHeader(JSStreamSource, sm.hdr) 3503 if len(ss) == 0 { 3504 continue 3505 } 3506 3507 var update = func(iName string, seq uint64) { 3508 // Only update active in case we have older ones in here that got configured out. 3509 if si := mset.sources[iName]; si != nil { 3510 if _, ok := seqs[iName]; !ok { 3511 seqs[iName] = seq 3512 } 3513 } 3514 } 3515 3516 streamName, iName, sSeq := streamAndSeq(string(ss)) 3517 if iName == _EMPTY_ { // Pre-2.10 message header means it's a match for any source using that stream name 3518 for _, ssi := range mset.cfg.Sources { 3519 if streamName == ssi.Name || (ssi.External != nil && streamName == ssi.Name+":"+getHash(ssi.External.ApiPrefix)) { 3520 update(ssi.iname, sSeq) 3521 } 3522 } 3523 } else { 3524 update(iName, sSeq) 3525 } 3526 if len(seqs) == expected { 3527 return 3528 } 3529 } 3530 } 3531 3532 // Setup our source consumers. 3533 // Lock should be held. 3534 func (mset *stream) setupSourceConsumers() error { 3535 if mset.outq == nil { 3536 return errors.New("outq required") 3537 } 3538 // Reset if needed. 3539 for _, si := range mset.sources { 3540 if si.sub != nil { 3541 mset.cancelSourceConsumer(si.iname) 3542 } 3543 } 3544 3545 // If we are no longer the leader, give up 3546 if !mset.isLeader() { 3547 return nil 3548 } 3549 3550 mset.startingSequenceForSources() 3551 3552 // Setup our consumers at the proper starting position. 3553 for _, ssi := range mset.cfg.Sources { 3554 if si := mset.sources[ssi.iname]; si != nil { 3555 mset.setSourceConsumer(ssi.iname, si.sseq+1, time.Time{}) 3556 } 3557 } 3558 3559 return nil 3560 } 3561 3562 // Will create internal subscriptions for the stream. 3563 // Lock should be held. 3564 func (mset *stream) subscribeToStream() error { 3565 if mset.active { 3566 return nil 3567 } 3568 for _, subject := range mset.cfg.Subjects { 3569 if _, err := mset.subscribeInternal(subject, mset.processInboundJetStreamMsg); err != nil { 3570 return err 3571 } 3572 } 3573 // Check if we need to setup mirroring. 3574 if mset.cfg.Mirror != nil { 3575 // setup the initial mirror sourceInfo 3576 mset.mirror = &sourceInfo{name: mset.cfg.Mirror.Name} 3577 sfs := make([]string, len(mset.cfg.Mirror.SubjectTransforms)) 3578 trs := make([]*subjectTransform, len(mset.cfg.Mirror.SubjectTransforms)) 3579 3580 for i, tr := range mset.cfg.Mirror.SubjectTransforms { 3581 // will not fail as already checked before that the transform will work 3582 subjectTransform, err := NewSubjectTransform(tr.Source, tr.Destination) 3583 if err != nil { 3584 mset.srv.Errorf("Unable to get transform for mirror consumer: %v", err) 3585 } 3586 3587 sfs[i] = tr.Source 3588 trs[i] = subjectTransform 3589 } 3590 mset.mirror.sfs = sfs 3591 mset.mirror.trs = trs 3592 // delay the actual mirror consumer creation for after a delay 3593 mset.scheduleSetupMirrorConsumerRetry() 3594 } else if len(mset.cfg.Sources) > 0 { 3595 // Setup the initial source infos for the sources 3596 mset.resetSourceInfo() 3597 // Delay the actual source consumer(s) creation(s) for after a delay 3598 3599 mset.sourcesConsumerSetup = time.AfterFunc(time.Duration(rand.Intn(int(10*time.Millisecond)))+10*time.Millisecond, func() { 3600 mset.mu.Lock() 3601 mset.setupSourceConsumers() 3602 mset.mu.Unlock() 3603 }) 3604 } 3605 // Check for direct get access. 3606 // We spin up followers for clustered streams in monitorStream(). 3607 if mset.cfg.AllowDirect { 3608 if err := mset.subscribeToDirect(); err != nil { 3609 return err 3610 } 3611 } 3612 3613 mset.active = true 3614 return nil 3615 } 3616 3617 // Lock should be held. 3618 func (mset *stream) subscribeToDirect() error { 3619 // We will make this listen on a queue group by default, which can allow mirrors to participate on opt-in basis. 3620 if mset.directSub == nil { 3621 dsubj := fmt.Sprintf(JSDirectMsgGetT, mset.cfg.Name) 3622 if sub, err := mset.queueSubscribeInternal(dsubj, dgetGroup, mset.processDirectGetRequest); err == nil { 3623 mset.directSub = sub 3624 } else { 3625 return err 3626 } 3627 } 3628 // Now the one that will have subject appended past stream name. 3629 if mset.lastBySub == nil { 3630 dsubj := fmt.Sprintf(JSDirectGetLastBySubjectT, mset.cfg.Name, fwcs) 3631 // We will make this listen on a queue group by default, which can allow mirrors to participate on opt-in basis. 3632 if sub, err := mset.queueSubscribeInternal(dsubj, dgetGroup, mset.processDirectGetLastBySubjectRequest); err == nil { 3633 mset.lastBySub = sub 3634 } else { 3635 return err 3636 } 3637 } 3638 3639 return nil 3640 } 3641 3642 // Lock should be held. 3643 func (mset *stream) unsubscribeToDirect() { 3644 if mset.directSub != nil { 3645 mset.unsubscribe(mset.directSub) 3646 mset.directSub = nil 3647 } 3648 if mset.lastBySub != nil { 3649 mset.unsubscribe(mset.lastBySub) 3650 mset.lastBySub = nil 3651 } 3652 } 3653 3654 // Lock should be held. 3655 func (mset *stream) subscribeToMirrorDirect() error { 3656 if mset.mirror == nil { 3657 return nil 3658 } 3659 3660 // We will make this listen on a queue group by default, which can allow mirrors to participate on opt-in basis. 3661 if mset.mirror.dsub == nil { 3662 dsubj := fmt.Sprintf(JSDirectMsgGetT, mset.mirror.name) 3663 // We will make this listen on a queue group by default, which can allow mirrors to participate on opt-in basis. 3664 if sub, err := mset.queueSubscribeInternal(dsubj, dgetGroup, mset.processDirectGetRequest); err == nil { 3665 mset.mirror.dsub = sub 3666 } else { 3667 return err 3668 } 3669 } 3670 // Now the one that will have subject appended past stream name. 3671 if mset.mirror.lbsub == nil { 3672 dsubj := fmt.Sprintf(JSDirectGetLastBySubjectT, mset.mirror.name, fwcs) 3673 // We will make this listen on a queue group by default, which can allow mirrors to participate on opt-in basis. 3674 if sub, err := mset.queueSubscribeInternal(dsubj, dgetGroup, mset.processDirectGetLastBySubjectRequest); err == nil { 3675 mset.mirror.lbsub = sub 3676 } else { 3677 return err 3678 } 3679 } 3680 3681 return nil 3682 } 3683 3684 // Stop our source consumers. 3685 // Lock should be held. 3686 func (mset *stream) stopSourceConsumers() { 3687 for _, si := range mset.sources { 3688 mset.cancelSourceInfo(si) 3689 } 3690 } 3691 3692 // Lock should be held. 3693 func (mset *stream) removeInternalConsumer(si *sourceInfo) { 3694 if si == nil || si.cname == _EMPTY_ { 3695 return 3696 } 3697 si.cname = _EMPTY_ 3698 } 3699 3700 // Will unsubscribe from the stream. 3701 // Lock should be held. 3702 func (mset *stream) unsubscribeToStream(stopping bool) error { 3703 for _, subject := range mset.cfg.Subjects { 3704 mset.unsubscribeInternal(subject) 3705 } 3706 if mset.mirror != nil { 3707 mset.cancelSourceInfo(mset.mirror) 3708 mset.mirror = nil 3709 } 3710 3711 if len(mset.sources) > 0 { 3712 mset.stopSourceConsumers() 3713 } 3714 3715 // In case we had a direct get subscriptions. 3716 if stopping { 3717 mset.unsubscribeToDirect() 3718 } 3719 3720 mset.active = false 3721 return nil 3722 } 3723 3724 // Lock does NOT need to be held, we set the client on setup and never change it at this point. 3725 func (mset *stream) subscribeInternal(subject string, cb msgHandler) (*subscription, error) { 3726 if mset.closed.Load() { 3727 return nil, errStreamClosed 3728 } 3729 if cb == nil { 3730 return nil, errInvalidMsgHandler 3731 } 3732 c := mset.client 3733 sid := int(mset.sid.Add(1)) 3734 // Now create the subscription 3735 return c.processSub([]byte(subject), nil, []byte(strconv.Itoa(sid)), cb, false) 3736 } 3737 3738 // Lock does NOT need to be held, we set the client on setup and never change it at this point. 3739 func (mset *stream) queueSubscribeInternal(subject, group string, cb msgHandler) (*subscription, error) { 3740 if mset.closed.Load() { 3741 return nil, errStreamClosed 3742 } 3743 if cb == nil { 3744 return nil, errInvalidMsgHandler 3745 } 3746 c := mset.client 3747 sid := int(mset.sid.Add(1)) 3748 // Now create the subscription 3749 return c.processSub([]byte(subject), []byte(group), []byte(strconv.Itoa(sid)), cb, false) 3750 } 3751 3752 // This will unsubscribe us from the exact subject given. 3753 // We do not currently track the subs so do not have the sid. 3754 // This should be called only on an update. 3755 // Lock does NOT need to be held, we set the client on setup and never change it at this point. 3756 func (mset *stream) unsubscribeInternal(subject string) error { 3757 if mset.closed.Load() { 3758 return errStreamClosed 3759 } 3760 c := mset.client 3761 var sid []byte 3762 c.mu.Lock() 3763 for _, sub := range c.subs { 3764 if subject == string(sub.subject) { 3765 sid = sub.sid 3766 break 3767 } 3768 } 3769 c.mu.Unlock() 3770 3771 if sid != nil { 3772 return c.processUnsub(sid) 3773 } 3774 return nil 3775 } 3776 3777 // Lock should be held. 3778 func (mset *stream) unsubscribe(sub *subscription) { 3779 if sub == nil || mset.closed.Load() { 3780 return 3781 } 3782 mset.client.processUnsub(sub.sid) 3783 } 3784 3785 func (mset *stream) setupStore(fsCfg *FileStoreConfig) error { 3786 mset.mu.Lock() 3787 mset.created = time.Now().UTC() 3788 3789 switch mset.cfg.Storage { 3790 case MemoryStorage: 3791 ms, err := newMemStore(&mset.cfg) 3792 if err != nil { 3793 mset.mu.Unlock() 3794 return err 3795 } 3796 mset.store = ms 3797 case FileStorage: 3798 s := mset.srv 3799 prf := s.jsKeyGen(s.getOpts().JetStreamKey, mset.acc.Name) 3800 if prf != nil { 3801 // We are encrypted here, fill in correct cipher selection. 3802 fsCfg.Cipher = s.getOpts().JetStreamCipher 3803 } 3804 oldprf := s.jsKeyGen(s.getOpts().JetStreamOldKey, mset.acc.Name) 3805 cfg := *fsCfg 3806 cfg.srv = s 3807 fs, err := newFileStoreWithCreated(cfg, mset.cfg, mset.created, prf, oldprf) 3808 if err != nil { 3809 mset.mu.Unlock() 3810 return err 3811 } 3812 mset.store = fs 3813 } 3814 // This will fire the callback but we do not require the lock since md will be 0 here. 3815 mset.store.RegisterStorageUpdates(mset.storeUpdates) 3816 mset.mu.Unlock() 3817 3818 return nil 3819 } 3820 3821 // Called for any updates to the underlying stream. We pass through the bytes to the 3822 // jetstream account. We do local processing for stream pending for consumers, but only 3823 // for removals. 3824 // Lock should not be held. 3825 func (mset *stream) storeUpdates(md, bd int64, seq uint64, subj string) { 3826 // If we have a single negative update then we will process our consumers for stream pending. 3827 // Purge and Store handled separately inside individual calls. 3828 if md == -1 && seq > 0 && subj != _EMPTY_ { 3829 // We use our consumer list mutex here instead of the main stream lock since it may be held already. 3830 mset.clsMu.RLock() 3831 // TODO(dlc) - Do sublist like signaling so we do not have to match? 3832 for _, o := range mset.cList { 3833 o.decStreamPending(seq, subj) 3834 } 3835 mset.clsMu.RUnlock() 3836 } else if md < 0 { 3837 // Batch decrements we need to force consumers to re-calculate num pending. 3838 mset.clsMu.RLock() 3839 for _, o := range mset.cList { 3840 o.streamNumPendingLocked() 3841 } 3842 mset.clsMu.RUnlock() 3843 } 3844 3845 if mset.jsa != nil { 3846 mset.jsa.updateUsage(mset.tier, mset.stype, bd) 3847 } 3848 } 3849 3850 // NumMsgIds returns the number of message ids being tracked for duplicate suppression. 3851 func (mset *stream) numMsgIds() int { 3852 mset.mu.Lock() 3853 defer mset.mu.Unlock() 3854 if !mset.ddloaded { 3855 mset.rebuildDedupe() 3856 } 3857 return len(mset.ddmap) 3858 } 3859 3860 // checkMsgId will process and check for duplicates. 3861 // Lock should be held. 3862 func (mset *stream) checkMsgId(id string) *ddentry { 3863 if !mset.ddloaded { 3864 mset.rebuildDedupe() 3865 } 3866 if id == _EMPTY_ || len(mset.ddmap) == 0 { 3867 return nil 3868 } 3869 return mset.ddmap[id] 3870 } 3871 3872 // Will purge the entries that are past the window. 3873 // Should be called from a timer. 3874 func (mset *stream) purgeMsgIds() { 3875 mset.mu.Lock() 3876 defer mset.mu.Unlock() 3877 3878 now := time.Now().UnixNano() 3879 tmrNext := mset.cfg.Duplicates 3880 window := int64(tmrNext) 3881 3882 for i, dde := range mset.ddarr[mset.ddindex:] { 3883 if now-dde.ts >= window { 3884 delete(mset.ddmap, dde.id) 3885 } else { 3886 mset.ddindex += i 3887 // Check if we should garbage collect here if we are 1/3 total size. 3888 if cap(mset.ddarr) > 3*(len(mset.ddarr)-mset.ddindex) { 3889 mset.ddarr = append([]*ddentry(nil), mset.ddarr[mset.ddindex:]...) 3890 mset.ddindex = 0 3891 } 3892 tmrNext = time.Duration(window - (now - dde.ts)) 3893 break 3894 } 3895 } 3896 if len(mset.ddmap) > 0 { 3897 // Make sure to not fire too quick 3898 const minFire = 50 * time.Millisecond 3899 if tmrNext < minFire { 3900 tmrNext = minFire 3901 } 3902 if mset.ddtmr != nil { 3903 mset.ddtmr.Reset(tmrNext) 3904 } else { 3905 mset.ddtmr = time.AfterFunc(tmrNext, mset.purgeMsgIds) 3906 } 3907 } else { 3908 if mset.ddtmr != nil { 3909 mset.ddtmr.Stop() 3910 mset.ddtmr = nil 3911 } 3912 mset.ddmap = nil 3913 mset.ddarr = nil 3914 mset.ddindex = 0 3915 } 3916 } 3917 3918 // storeMsgId will store the message id for duplicate detection. 3919 func (mset *stream) storeMsgId(dde *ddentry) { 3920 mset.mu.Lock() 3921 defer mset.mu.Unlock() 3922 mset.storeMsgIdLocked(dde) 3923 } 3924 3925 // storeMsgIdLocked will store the message id for duplicate detection. 3926 // Lock should he held. 3927 func (mset *stream) storeMsgIdLocked(dde *ddentry) { 3928 if mset.ddmap == nil { 3929 mset.ddmap = make(map[string]*ddentry) 3930 } 3931 mset.ddmap[dde.id] = dde 3932 mset.ddarr = append(mset.ddarr, dde) 3933 if mset.ddtmr == nil { 3934 mset.ddtmr = time.AfterFunc(mset.cfg.Duplicates, mset.purgeMsgIds) 3935 } 3936 } 3937 3938 // Fast lookup of msgId. 3939 func getMsgId(hdr []byte) string { 3940 return string(getHeader(JSMsgId, hdr)) 3941 } 3942 3943 // Fast lookup of expected last msgId. 3944 func getExpectedLastMsgId(hdr []byte) string { 3945 return string(getHeader(JSExpectedLastMsgId, hdr)) 3946 } 3947 3948 // Fast lookup of expected stream. 3949 func getExpectedStream(hdr []byte) string { 3950 return string(getHeader(JSExpectedStream, hdr)) 3951 } 3952 3953 // Fast lookup of expected stream. 3954 func getExpectedLastSeq(hdr []byte) (uint64, bool) { 3955 bseq := getHeader(JSExpectedLastSeq, hdr) 3956 if len(bseq) == 0 { 3957 return 0, false 3958 } 3959 return uint64(parseInt64(bseq)), true 3960 } 3961 3962 // Fast lookup of rollups. 3963 func getRollup(hdr []byte) string { 3964 r := getHeader(JSMsgRollup, hdr) 3965 if len(r) == 0 { 3966 return _EMPTY_ 3967 } 3968 return strings.ToLower(string(r)) 3969 } 3970 3971 // Fast lookup of expected stream sequence per subject. 3972 func getExpectedLastSeqPerSubject(hdr []byte) (uint64, bool) { 3973 bseq := getHeader(JSExpectedLastSubjSeq, hdr) 3974 if len(bseq) == 0 { 3975 return 0, false 3976 } 3977 return uint64(parseInt64(bseq)), true 3978 } 3979 3980 // Signal if we are clustered. Will acquire rlock. 3981 func (mset *stream) IsClustered() bool { 3982 mset.mu.RLock() 3983 defer mset.mu.RUnlock() 3984 return mset.isClustered() 3985 } 3986 3987 // Lock should be held. 3988 func (mset *stream) isClustered() bool { 3989 return mset.node != nil 3990 } 3991 3992 // Used if we have to queue things internally to avoid the route/gw path. 3993 type inMsg struct { 3994 subj string 3995 rply string 3996 hdr []byte 3997 msg []byte 3998 mt *msgTrace 3999 } 4000 4001 func (mset *stream) queueInbound(ib *ipQueue[*inMsg], subj, rply string, hdr, msg []byte, mt *msgTrace) { 4002 ib.push(&inMsg{subj, rply, hdr, msg, mt}) 4003 } 4004 4005 var dgPool = sync.Pool{ 4006 New: func() any { 4007 return &directGetReq{} 4008 }, 4009 } 4010 4011 // For when we need to not inline the request. 4012 type directGetReq struct { 4013 // Copy of this is correct for this. 4014 req JSApiMsgGetRequest 4015 reply string 4016 } 4017 4018 // processDirectGetRequest handles direct get request for stream messages. 4019 func (mset *stream) processDirectGetRequest(_ *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 4020 if len(reply) == 0 { 4021 return 4022 } 4023 _, msg := c.msgParts(rmsg) 4024 if len(msg) == 0 { 4025 hdr := []byte("NATS/1.0 408 Empty Request\r\n\r\n") 4026 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4027 return 4028 } 4029 var req JSApiMsgGetRequest 4030 err := json.Unmarshal(msg, &req) 4031 if err != nil { 4032 hdr := []byte("NATS/1.0 408 Malformed Request\r\n\r\n") 4033 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4034 return 4035 } 4036 // Check if nothing set. 4037 if req.Seq == 0 && req.LastFor == _EMPTY_ && req.NextFor == _EMPTY_ && len(req.MultiLastFor) == 0 && req.StartTime == nil { 4038 hdr := []byte("NATS/1.0 408 Empty Request\r\n\r\n") 4039 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4040 return 4041 } 4042 // Check we don't have conflicting options set. 4043 // We do not allow batch mode for lastFor requests. 4044 if (req.Seq > 0 && req.LastFor != _EMPTY_) || 4045 (req.Seq > 0 && req.StartTime != nil) || 4046 (req.StartTime != nil && req.LastFor != _EMPTY_) || 4047 (req.LastFor != _EMPTY_ && req.NextFor != _EMPTY_) || 4048 (req.LastFor != _EMPTY_ && req.Batch > 0) || 4049 (req.LastFor != _EMPTY_ && len(req.MultiLastFor) > 0) || 4050 (req.NextFor != _EMPTY_ && len(req.MultiLastFor) > 0) || 4051 (req.UpToSeq > 0 && req.UpToTime != nil) { 4052 hdr := []byte("NATS/1.0 408 Bad Request\r\n\r\n") 4053 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4054 return 4055 } 4056 4057 inlineOk := c.kind != ROUTER && c.kind != GATEWAY && c.kind != LEAF 4058 if !inlineOk { 4059 dg := dgPool.Get().(*directGetReq) 4060 dg.req, dg.reply = req, reply 4061 mset.gets.push(dg) 4062 } else { 4063 mset.getDirectRequest(&req, reply) 4064 } 4065 } 4066 4067 // This is for direct get by last subject which is part of the subject itself. 4068 func (mset *stream) processDirectGetLastBySubjectRequest(_ *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 4069 if len(reply) == 0 { 4070 return 4071 } 4072 _, msg := c.msgParts(rmsg) 4073 // This version expects no payload. 4074 if len(msg) != 0 { 4075 hdr := []byte("NATS/1.0 408 Bad Request\r\n\r\n") 4076 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4077 return 4078 } 4079 // Extract the key. 4080 var key string 4081 for i, n := 0, 0; i < len(subject); i++ { 4082 if subject[i] == btsep { 4083 if n == 4 { 4084 if start := i + 1; start < len(subject) { 4085 key = subject[i+1:] 4086 } 4087 break 4088 } 4089 n++ 4090 } 4091 } 4092 if len(key) == 0 { 4093 hdr := []byte("NATS/1.0 408 Bad Request\r\n\r\n") 4094 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4095 return 4096 } 4097 4098 req := JSApiMsgGetRequest{LastFor: key} 4099 4100 inlineOk := c.kind != ROUTER && c.kind != GATEWAY && c.kind != LEAF 4101 if !inlineOk { 4102 dg := dgPool.Get().(*directGetReq) 4103 dg.req, dg.reply = req, reply 4104 mset.gets.push(dg) 4105 } else { 4106 mset.getDirectRequest(&req, reply) 4107 } 4108 } 4109 4110 // For direct get batch and multi requests. 4111 const ( 4112 dg = "NATS/1.0\r\nNats-Stream: %s\r\nNats-Subject: %s\r\nNats-Sequence: %d\r\nNats-Time-Stamp: %s\r\n\r\n" 4113 dgb = "NATS/1.0\r\nNats-Stream: %s\r\nNats-Subject: %s\r\nNats-Sequence: %d\r\nNats-Time-Stamp: %s\r\nNats-Num-Pending: %d\r\nNats-Last-Sequence: %d\r\n\r\n" 4114 eob = "NATS/1.0 204 EOB\r\nNats-Num-Pending: %d\r\nNats-Last-Sequence: %d\r\n\r\n" 4115 eobm = "NATS/1.0 204 EOB\r\nNats-Num-Pending: %d\r\nNats-Last-Sequence: %d\r\nNats-UpTo-Sequence: %d\r\n\r\n" 4116 ) 4117 4118 // Handle a multi request. 4119 func (mset *stream) getDirectMulti(req *JSApiMsgGetRequest, reply string) { 4120 // TODO(dlc) - Make configurable? 4121 const maxAllowedResponses = 1024 4122 4123 // We hold the lock here to try to avoid changes out from underneath of us. 4124 mset.mu.RLock() 4125 defer mset.mu.RUnlock() 4126 // Grab store and name. 4127 store, name, s := mset.store, mset.cfg.Name, mset.srv 4128 4129 // Grab MaxBytes 4130 mb := req.MaxBytes 4131 if mb == 0 && s != nil { 4132 // Fill in with the server's MaxPending. 4133 mb = int(s.opts.MaxPending) 4134 } 4135 4136 upToSeq := req.UpToSeq 4137 // If we have UpToTime set get the proper sequence. 4138 if req.UpToTime != nil { 4139 upToSeq = store.GetSeqFromTime((*req.UpToTime).UTC()) 4140 // We need to back off one since this is used to determine start sequence normally, 4141 // were as here we want it to be the ceiling. 4142 upToSeq-- 4143 } 4144 // If not set, set to the last sequence and remember that for EOB. 4145 if upToSeq == 0 { 4146 var state StreamState 4147 mset.store.FastState(&state) 4148 upToSeq = state.LastSeq 4149 } 4150 4151 seqs, err := store.MultiLastSeqs(req.MultiLastFor, upToSeq, maxAllowedResponses) 4152 if err != nil { 4153 var hdr []byte 4154 if err == ErrTooManyResults { 4155 hdr = []byte("NATS/1.0 413 Too Many Results\r\n\r\n") 4156 } else { 4157 hdr = []byte(fmt.Sprintf("NATS/1.0 500 %v\r\n\r\n", err)) 4158 } 4159 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4160 return 4161 } 4162 if len(seqs) == 0 { 4163 hdr := []byte("NATS/1.0 404 No Results\r\n\r\n") 4164 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4165 return 4166 } 4167 4168 np, lseq, sentBytes, sent := uint64(len(seqs)-1), uint64(0), 0, 0 4169 for _, seq := range seqs { 4170 if seq < req.Seq { 4171 if np > 0 { 4172 np-- 4173 } 4174 continue 4175 } 4176 var svp StoreMsg 4177 sm, err := store.LoadMsg(seq, &svp) 4178 if err != nil { 4179 hdr := []byte("NATS/1.0 404 Message Not Found\r\n\r\n") 4180 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4181 return 4182 } 4183 4184 hdr := sm.hdr 4185 ts := time.Unix(0, sm.ts).UTC() 4186 4187 if len(hdr) == 0 { 4188 hdr = fmt.Appendf(nil, dgb, name, sm.subj, sm.seq, ts.Format(time.RFC3339Nano), np, lseq) 4189 } else { 4190 hdr = copyBytes(hdr) 4191 hdr = genHeader(hdr, JSStream, name) 4192 hdr = genHeader(hdr, JSSubject, sm.subj) 4193 hdr = genHeader(hdr, JSSequence, strconv.FormatUint(sm.seq, 10)) 4194 hdr = genHeader(hdr, JSTimeStamp, ts.Format(time.RFC3339Nano)) 4195 hdr = genHeader(hdr, JSNumPending, strconv.FormatUint(np, 10)) 4196 hdr = genHeader(hdr, JSLastSequence, strconv.FormatUint(lseq, 10)) 4197 } 4198 // Decrement num pending. This is optimization and we do not continue to look it up for these operations. 4199 if np > 0 { 4200 np-- 4201 } 4202 // Track our lseq 4203 lseq = sm.seq 4204 // Send out our message. 4205 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, sm.msg, nil, 0)) 4206 // Check if we have exceeded max bytes. 4207 sentBytes += len(sm.subj) + len(sm.hdr) + len(sm.msg) 4208 if sentBytes >= mb { 4209 break 4210 } 4211 sent++ 4212 if req.Batch > 0 && sent >= req.Batch { 4213 break 4214 } 4215 } 4216 4217 // Send out EOB 4218 hdr := fmt.Appendf(nil, eobm, np, lseq, upToSeq) 4219 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4220 } 4221 4222 // Do actual work on a direct msg request. 4223 // This could be called in a Go routine if we are inline for a non-client connection. 4224 func (mset *stream) getDirectRequest(req *JSApiMsgGetRequest, reply string) { 4225 // Handle multi in separate function. 4226 if len(req.MultiLastFor) > 0 { 4227 mset.getDirectMulti(req, reply) 4228 return 4229 } 4230 4231 mset.mu.RLock() 4232 store, name, s := mset.store, mset.cfg.Name, mset.srv 4233 mset.mu.RUnlock() 4234 4235 var seq uint64 4236 // Lookup start seq if AsOfTime is set. 4237 if req.StartTime != nil { 4238 seq = store.GetSeqFromTime(*req.StartTime) 4239 } else { 4240 seq = req.Seq 4241 } 4242 4243 wc := subjectHasWildcard(req.NextFor) 4244 // For tracking num pending if we are batch. 4245 var np, lseq, validThrough uint64 4246 var isBatchRequest bool 4247 batch := req.Batch 4248 if batch == 0 { 4249 batch = 1 4250 } else { 4251 // This is a batch request, capture initial numPending. 4252 isBatchRequest = true 4253 np, validThrough = store.NumPending(seq, req.NextFor, false) 4254 } 4255 4256 // Grab MaxBytes 4257 mb := req.MaxBytes 4258 if mb == 0 && s != nil { 4259 // Fill in with the server's MaxPending. 4260 mb = int(s.opts.MaxPending) 4261 } 4262 // Track what we have sent. 4263 var sentBytes int 4264 4265 // Loop over batch, which defaults to 1. 4266 for i := 0; i < batch; i++ { 4267 var ( 4268 svp StoreMsg 4269 sm *StoreMsg 4270 err error 4271 ) 4272 if seq > 0 && req.NextFor == _EMPTY_ { 4273 // Only do direct lookup for first in a batch. 4274 if i == 0 { 4275 sm, err = store.LoadMsg(seq, &svp) 4276 } else { 4277 // We want to use load next with fwcs to step over deleted msgs. 4278 sm, seq, err = store.LoadNextMsg(fwcs, true, seq, &svp) 4279 } 4280 // Bump for next loop if applicable. 4281 seq++ 4282 } else if req.NextFor != _EMPTY_ { 4283 sm, seq, err = store.LoadNextMsg(req.NextFor, wc, seq, &svp) 4284 seq++ 4285 } else { 4286 // Batch is not applicable here, this is checked before we get here. 4287 sm, err = store.LoadLastMsg(req.LastFor, &svp) 4288 } 4289 if err != nil { 4290 // For batches, if we stop early we want to do EOB logic below. 4291 if batch > 1 && i > 0 { 4292 break 4293 } 4294 hdr := []byte("NATS/1.0 404 Message Not Found\r\n\r\n") 4295 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4296 return 4297 } 4298 4299 hdr := sm.hdr 4300 ts := time.Unix(0, sm.ts).UTC() 4301 4302 if isBatchRequest { 4303 if len(hdr) == 0 { 4304 hdr = fmt.Appendf(nil, dgb, name, sm.subj, sm.seq, ts.Format(time.RFC3339Nano), np, lseq) 4305 } else { 4306 hdr = copyBytes(hdr) 4307 hdr = genHeader(hdr, JSStream, name) 4308 hdr = genHeader(hdr, JSSubject, sm.subj) 4309 hdr = genHeader(hdr, JSSequence, strconv.FormatUint(sm.seq, 10)) 4310 hdr = genHeader(hdr, JSTimeStamp, ts.Format(time.RFC3339Nano)) 4311 hdr = genHeader(hdr, JSNumPending, strconv.FormatUint(np, 10)) 4312 hdr = genHeader(hdr, JSLastSequence, strconv.FormatUint(lseq, 10)) 4313 } 4314 // Decrement num pending. This is optimization and we do not continue to look it up for these operations. 4315 np-- 4316 } else { 4317 if len(hdr) == 0 { 4318 hdr = fmt.Appendf(nil, dg, name, sm.subj, sm.seq, ts.Format(time.RFC3339Nano)) 4319 } else { 4320 hdr = copyBytes(hdr) 4321 hdr = genHeader(hdr, JSStream, name) 4322 hdr = genHeader(hdr, JSSubject, sm.subj) 4323 hdr = genHeader(hdr, JSSequence, strconv.FormatUint(sm.seq, 10)) 4324 hdr = genHeader(hdr, JSTimeStamp, ts.Format(time.RFC3339Nano)) 4325 } 4326 } 4327 // Track our lseq 4328 lseq = sm.seq 4329 // Send out our message. 4330 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, sm.msg, nil, 0)) 4331 // Check if we have exceeded max bytes. 4332 sentBytes += len(sm.subj) + len(sm.hdr) + len(sm.msg) 4333 if sentBytes >= mb { 4334 break 4335 } 4336 } 4337 4338 // If batch was requested send EOB. 4339 if isBatchRequest { 4340 // Update if the stream's lasts sequence has moved past our validThrough. 4341 if mset.lastSeq() > validThrough { 4342 np, _ = store.NumPending(seq, req.NextFor, false) 4343 } 4344 hdr := fmt.Appendf(nil, eob, np, lseq) 4345 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4346 } 4347 } 4348 4349 // processInboundJetStreamMsg handles processing messages bound for a stream. 4350 func (mset *stream) processInboundJetStreamMsg(_ *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 4351 hdr, msg := c.msgParts(rmsg) 4352 // Copy these. 4353 if len(hdr) > 0 { 4354 hdr = copyBytes(hdr) 4355 } 4356 if len(msg) > 0 { 4357 msg = copyBytes(msg) 4358 } 4359 if mt, traceOnly := c.isMsgTraceEnabled(); mt != nil { 4360 // If message is delivered, we need to disable the message trace headers 4361 // to prevent a trace event to be generated when a stored message 4362 // is delivered to a consumer and routed. 4363 if !traceOnly { 4364 disableTraceHeaders(c, hdr) 4365 } 4366 // This will add the jetstream event while in the client read loop. 4367 // Since the event will be updated in a different go routine, the 4368 // tracing object will have a separate reference to the JS trace 4369 // object. 4370 mt.addJetStreamEvent(mset.name()) 4371 } 4372 mset.queueInbound(mset.msgs, subject, reply, hdr, msg, c.pa.trace) 4373 } 4374 4375 var ( 4376 errLastSeqMismatch = errors.New("last sequence mismatch") 4377 errMsgIdDuplicate = errors.New("msgid is duplicate") 4378 errStreamClosed = errors.New("stream closed") 4379 errInvalidMsgHandler = errors.New("undefined message handler") 4380 errStreamMismatch = errors.New("expected stream does not match") 4381 ) 4382 4383 // processJetStreamMsg is where we try to actually process the stream msg. 4384 func (mset *stream) processJetStreamMsg(subject, reply string, hdr, msg []byte, lseq uint64, ts int64, mt *msgTrace) (retErr error) { 4385 if mt != nil { 4386 // Only the leader/standalone will have mt!=nil. On exit, send the 4387 // message trace event. 4388 defer func() { 4389 mt.sendEventFromJetStream(retErr) 4390 }() 4391 } 4392 4393 if mset.closed.Load() { 4394 return errStreamClosed 4395 } 4396 4397 mset.mu.Lock() 4398 s, store := mset.srv, mset.store 4399 4400 traceOnly := mt.traceOnly() 4401 bumpCLFS := func() { 4402 // Do not bump if tracing and not doing message delivery. 4403 if traceOnly { 4404 return 4405 } 4406 mset.clMu.Lock() 4407 mset.clfs++ 4408 mset.clMu.Unlock() 4409 } 4410 4411 // Apply the input subject transform if any 4412 if mset.itr != nil { 4413 ts, err := mset.itr.Match(subject) 4414 if err == nil { 4415 // no filtering: if the subject doesn't map the source of the transform, don't change it 4416 subject = ts 4417 } 4418 } 4419 4420 var accName string 4421 if mset.acc != nil { 4422 accName = mset.acc.Name 4423 } 4424 4425 js, jsa, doAck := mset.js, mset.jsa, !mset.cfg.NoAck 4426 name, stype := mset.cfg.Name, mset.cfg.Storage 4427 maxMsgSize := int(mset.cfg.MaxMsgSize) 4428 numConsumers := len(mset.consumers) 4429 interestRetention := mset.cfg.Retention == InterestPolicy 4430 // Snapshot if we are the leader and if we can respond. 4431 isLeader, isSealed := mset.isLeader(), mset.cfg.Sealed 4432 canRespond := doAck && len(reply) > 0 && isLeader 4433 4434 var resp = &JSPubAckResponse{} 4435 4436 // Bail here if sealed. 4437 if isSealed { 4438 outq := mset.outq 4439 mset.mu.Unlock() 4440 bumpCLFS() 4441 if canRespond && outq != nil { 4442 resp.PubAck = &PubAck{Stream: name} 4443 resp.Error = ApiErrors[JSStreamSealedErr] 4444 b, _ := json.Marshal(resp) 4445 outq.sendMsg(reply, b) 4446 } 4447 return ApiErrors[JSStreamSealedErr] 4448 } 4449 4450 var buf [256]byte 4451 pubAck := append(buf[:0], mset.pubAck...) 4452 4453 // If this is a non-clustered msg and we are not considered active, meaning no active subscription, do not process. 4454 if lseq == 0 && ts == 0 && !mset.active { 4455 mset.mu.Unlock() 4456 return nil 4457 } 4458 4459 // For clustering the lower layers will pass our expected lseq. If it is present check for that here. 4460 if lseq > 0 && lseq != (mset.lseq+mset.clfs) { 4461 isMisMatch := true 4462 // We may be able to recover here if we have no state whatsoever, or we are a mirror. 4463 // See if we have to adjust our starting sequence. 4464 if mset.lseq == 0 || mset.cfg.Mirror != nil { 4465 var state StreamState 4466 mset.store.FastState(&state) 4467 if state.FirstSeq == 0 { 4468 mset.store.Compact(lseq + 1) 4469 mset.lseq = lseq 4470 isMisMatch = false 4471 } 4472 } 4473 // Really is a mismatch. 4474 if isMisMatch { 4475 outq := mset.outq 4476 mset.mu.Unlock() 4477 if canRespond && outq != nil { 4478 resp.PubAck = &PubAck{Stream: name} 4479 resp.Error = ApiErrors[JSStreamSequenceNotMatchErr] 4480 b, _ := json.Marshal(resp) 4481 outq.sendMsg(reply, b) 4482 } 4483 return errLastSeqMismatch 4484 } 4485 } 4486 4487 // If we have received this message across an account we may have request information attached. 4488 // For now remove. TODO(dlc) - Should this be opt-in or opt-out? 4489 if len(hdr) > 0 { 4490 hdr = removeHeaderIfPresent(hdr, ClientInfoHdr) 4491 } 4492 4493 // Process additional msg headers if still present. 4494 var msgId string 4495 var rollupSub, rollupAll bool 4496 isClustered := mset.isClustered() 4497 4498 if len(hdr) > 0 { 4499 outq := mset.outq 4500 4501 // Certain checks have already been performed if in clustered mode, so only check if not. 4502 // Note, for cluster mode but with message tracing (without message delivery), we need 4503 // to do this check here since it was not done in processClusteredInboundMsg(). 4504 if !isClustered || traceOnly { 4505 // Expected stream. 4506 if sname := getExpectedStream(hdr); sname != _EMPTY_ && sname != name { 4507 mset.mu.Unlock() 4508 bumpCLFS() 4509 if canRespond { 4510 resp.PubAck = &PubAck{Stream: name} 4511 resp.Error = NewJSStreamNotMatchError() 4512 b, _ := json.Marshal(resp) 4513 outq.sendMsg(reply, b) 4514 } 4515 return errStreamMismatch 4516 } 4517 } 4518 4519 // Dedupe detection. This is done at the cluster level for dedupe detectiom above the 4520 // lower layers. But we still need to pull out the msgId. 4521 if msgId = getMsgId(hdr); msgId != _EMPTY_ { 4522 // Do real check only if not clustered or traceOnly flag is set. 4523 if !isClustered || traceOnly { 4524 if dde := mset.checkMsgId(msgId); dde != nil { 4525 mset.mu.Unlock() 4526 bumpCLFS() 4527 if canRespond { 4528 response := append(pubAck, strconv.FormatUint(dde.seq, 10)...) 4529 response = append(response, ",\"duplicate\": true}"...) 4530 outq.sendMsg(reply, response) 4531 } 4532 return errMsgIdDuplicate 4533 } 4534 } 4535 } 4536 4537 // Expected last sequence per subject. 4538 // If we are clustered we have prechecked seq > 0. 4539 if seq, exists := getExpectedLastSeqPerSubject(hdr); exists { 4540 // TODO(dlc) - We could make a new store func that does this all in one. 4541 var smv StoreMsg 4542 var fseq uint64 4543 sm, err := store.LoadLastMsg(subject, &smv) 4544 if sm != nil { 4545 fseq = sm.seq 4546 } 4547 if err == ErrStoreMsgNotFound && seq == 0 { 4548 fseq, err = 0, nil 4549 } 4550 if err != nil || fseq != seq { 4551 mset.mu.Unlock() 4552 bumpCLFS() 4553 if canRespond { 4554 resp.PubAck = &PubAck{Stream: name} 4555 resp.Error = NewJSStreamWrongLastSequenceError(fseq) 4556 b, _ := json.Marshal(resp) 4557 outq.sendMsg(reply, b) 4558 } 4559 return fmt.Errorf("last sequence by subject mismatch: %d vs %d", seq, fseq) 4560 } 4561 } 4562 4563 // Expected last sequence. 4564 if seq, exists := getExpectedLastSeq(hdr); exists && seq != mset.lseq { 4565 mlseq := mset.lseq 4566 mset.mu.Unlock() 4567 bumpCLFS() 4568 if canRespond { 4569 resp.PubAck = &PubAck{Stream: name} 4570 resp.Error = NewJSStreamWrongLastSequenceError(mlseq) 4571 b, _ := json.Marshal(resp) 4572 outq.sendMsg(reply, b) 4573 } 4574 return fmt.Errorf("last sequence mismatch: %d vs %d", seq, mlseq) 4575 } 4576 // Expected last msgId. 4577 if lmsgId := getExpectedLastMsgId(hdr); lmsgId != _EMPTY_ { 4578 if mset.lmsgId == _EMPTY_ && !mset.ddloaded { 4579 mset.rebuildDedupe() 4580 } 4581 if lmsgId != mset.lmsgId { 4582 last := mset.lmsgId 4583 mset.mu.Unlock() 4584 bumpCLFS() 4585 if canRespond { 4586 resp.PubAck = &PubAck{Stream: name} 4587 resp.Error = NewJSStreamWrongLastMsgIDError(last) 4588 b, _ := json.Marshal(resp) 4589 outq.sendMsg(reply, b) 4590 } 4591 return fmt.Errorf("last msgid mismatch: %q vs %q", lmsgId, last) 4592 } 4593 } 4594 // Check for any rollups. 4595 if rollup := getRollup(hdr); rollup != _EMPTY_ { 4596 if !mset.cfg.AllowRollup || mset.cfg.DenyPurge { 4597 mset.mu.Unlock() 4598 bumpCLFS() 4599 if canRespond { 4600 resp.PubAck = &PubAck{Stream: name} 4601 resp.Error = NewJSStreamRollupFailedError(errors.New("rollup not permitted")) 4602 b, _ := json.Marshal(resp) 4603 outq.sendMsg(reply, b) 4604 } 4605 return errors.New("rollup not permitted") 4606 } 4607 switch rollup { 4608 case JSMsgRollupSubject: 4609 rollupSub = true 4610 case JSMsgRollupAll: 4611 rollupAll = true 4612 default: 4613 mset.mu.Unlock() 4614 bumpCLFS() 4615 err := fmt.Errorf("rollup value invalid: %q", rollup) 4616 if canRespond { 4617 resp.PubAck = &PubAck{Stream: name} 4618 resp.Error = NewJSStreamRollupFailedError(err) 4619 b, _ := json.Marshal(resp) 4620 outq.sendMsg(reply, b) 4621 } 4622 return err 4623 } 4624 } 4625 } 4626 4627 // Response Ack. 4628 var ( 4629 response []byte 4630 seq uint64 4631 err error 4632 ) 4633 4634 // Check to see if we are over the max msg size. 4635 if maxMsgSize >= 0 && (len(hdr)+len(msg)) > maxMsgSize { 4636 mset.mu.Unlock() 4637 bumpCLFS() 4638 if canRespond { 4639 resp.PubAck = &PubAck{Stream: name} 4640 resp.Error = NewJSStreamMessageExceedsMaximumError() 4641 response, _ = json.Marshal(resp) 4642 mset.outq.sendMsg(reply, response) 4643 } 4644 return ErrMaxPayload 4645 } 4646 4647 if len(hdr) > math.MaxUint16 { 4648 mset.mu.Unlock() 4649 bumpCLFS() 4650 if canRespond { 4651 resp.PubAck = &PubAck{Stream: name} 4652 resp.Error = NewJSStreamHeaderExceedsMaximumError() 4653 response, _ = json.Marshal(resp) 4654 mset.outq.sendMsg(reply, response) 4655 } 4656 return ErrMaxPayload 4657 } 4658 4659 // Check to see if we have exceeded our limits. 4660 if js.limitsExceeded(stype) { 4661 s.resourcesExceededError() 4662 mset.mu.Unlock() 4663 bumpCLFS() 4664 if canRespond { 4665 resp.PubAck = &PubAck{Stream: name} 4666 resp.Error = NewJSInsufficientResourcesError() 4667 response, _ = json.Marshal(resp) 4668 mset.outq.sendMsg(reply, response) 4669 } 4670 // Stepdown regardless. 4671 if node := mset.raftNode(); node != nil { 4672 node.StepDown() 4673 } 4674 return NewJSInsufficientResourcesError() 4675 } 4676 4677 var noInterest bool 4678 4679 // If we are interest based retention and have no consumers then we can skip. 4680 if interestRetention { 4681 if numConsumers == 0 { 4682 noInterest = true 4683 } else if mset.numFilter > 0 { 4684 // Assume no interest and check to disqualify. 4685 noInterest = true 4686 mset.clsMu.RLock() 4687 for _, o := range mset.cList { 4688 if o.cfg.FilterSubject == _EMPTY_ || subjectIsSubsetMatch(subject, o.cfg.FilterSubject) { 4689 noInterest = false 4690 break 4691 } 4692 } 4693 mset.clsMu.RUnlock() 4694 } 4695 } 4696 4697 // Grab timestamp if not already set. 4698 if ts == 0 && lseq > 0 { 4699 ts = time.Now().UnixNano() 4700 } 4701 4702 mt.updateJetStreamEvent(subject, noInterest) 4703 if traceOnly { 4704 mset.mu.Unlock() 4705 return nil 4706 } 4707 4708 // Skip msg here. 4709 if noInterest { 4710 mset.lseq = store.SkipMsg() 4711 mset.lmsgId = msgId 4712 // If we have a msgId make sure to save. 4713 if msgId != _EMPTY_ { 4714 mset.storeMsgIdLocked(&ddentry{msgId, seq, ts}) 4715 } 4716 if canRespond { 4717 response = append(pubAck, strconv.FormatUint(mset.lseq, 10)...) 4718 response = append(response, '}') 4719 mset.outq.sendMsg(reply, response) 4720 } 4721 mset.mu.Unlock() 4722 return nil 4723 } 4724 4725 // If here we will attempt to store the message. 4726 // Assume this will succeed. 4727 olmsgId := mset.lmsgId 4728 mset.lmsgId = msgId 4729 clfs := mset.clfs 4730 mset.lseq++ 4731 tierName := mset.tier 4732 4733 // Republish state if needed. 4734 var tsubj string 4735 var tlseq uint64 4736 var thdrsOnly bool 4737 if mset.tr != nil { 4738 tsubj, _ = mset.tr.Match(subject) 4739 if mset.cfg.RePublish != nil { 4740 thdrsOnly = mset.cfg.RePublish.HeadersOnly 4741 } 4742 } 4743 republish := tsubj != _EMPTY_ && isLeader 4744 4745 // If we are republishing grab last sequence for this exact subject. Aids in gap detection for lightweight clients. 4746 if republish { 4747 var smv StoreMsg 4748 if sm, _ := store.LoadLastMsg(subject, &smv); sm != nil { 4749 tlseq = sm.seq 4750 } 4751 } 4752 4753 // If clustered this was already checked and we do not want to check here and possibly introduce skew. 4754 if !isClustered { 4755 if exceeded, err := jsa.wouldExceedLimits(stype, tierName, mset.cfg.Replicas, subject, hdr, msg); exceeded { 4756 if err == nil { 4757 err = NewJSAccountResourcesExceededError() 4758 } 4759 s.RateLimitWarnf("JetStream resource limits exceeded for account: %q", accName) 4760 if canRespond { 4761 resp.PubAck = &PubAck{Stream: name} 4762 resp.Error = err 4763 response, _ = json.Marshal(resp) 4764 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, nil, response, nil, 0)) 4765 } 4766 } 4767 } 4768 4769 // Store actual msg. 4770 if lseq == 0 && ts == 0 { 4771 seq, ts, err = store.StoreMsg(subject, hdr, msg) 4772 } else { 4773 // Make sure to take into account any message assignments that we had to skip (clfs). 4774 seq = lseq + 1 - clfs 4775 // Check for preAcks and the need to skip vs store. 4776 if mset.hasAllPreAcks(seq, subject) { 4777 mset.clearAllPreAcks(seq) 4778 store.SkipMsg() 4779 } else { 4780 err = store.StoreRawMsg(subject, hdr, msg, seq, ts) 4781 } 4782 } 4783 4784 if err != nil { 4785 // If we did not succeed put those values back and increment clfs in case we are clustered. 4786 var state StreamState 4787 mset.store.FastState(&state) 4788 mset.lseq = state.LastSeq 4789 mset.lmsgId = olmsgId 4790 mset.mu.Unlock() 4791 bumpCLFS() 4792 4793 switch err { 4794 case ErrMaxMsgs, ErrMaxBytes, ErrMaxMsgsPerSubject, ErrMsgTooLarge: 4795 s.RateLimitDebugf("JetStream failed to store a msg on stream '%s > %s': %v", accName, name, err) 4796 case ErrStoreClosed: 4797 default: 4798 s.Errorf("JetStream failed to store a msg on stream '%s > %s': %v", accName, name, err) 4799 } 4800 4801 if canRespond { 4802 resp.PubAck = &PubAck{Stream: name} 4803 resp.Error = NewJSStreamStoreFailedError(err, Unless(err)) 4804 response, _ = json.Marshal(resp) 4805 mset.outq.sendMsg(reply, response) 4806 } 4807 return err 4808 } 4809 4810 // If we have a msgId make sure to save. 4811 // This will replace our estimate from the cluster layer if we are clustered. 4812 if msgId != _EMPTY_ { 4813 if isClustered && isLeader && mset.ddmap != nil { 4814 if dde := mset.ddmap[msgId]; dde != nil { 4815 dde.seq, dde.ts = seq, ts 4816 } else { 4817 mset.storeMsgIdLocked(&ddentry{msgId, seq, ts}) 4818 } 4819 } else { 4820 // R1 or not leader.. 4821 mset.storeMsgIdLocked(&ddentry{msgId, seq, ts}) 4822 } 4823 } 4824 4825 // If here we succeeded in storing the message. 4826 mset.mu.Unlock() 4827 4828 // No errors, this is the normal path. 4829 if rollupSub { 4830 mset.purge(&JSApiStreamPurgeRequest{Subject: subject, Keep: 1}) 4831 } else if rollupAll { 4832 mset.purge(&JSApiStreamPurgeRequest{Keep: 1}) 4833 } 4834 4835 // Check for republish. 4836 if republish { 4837 tsStr := time.Unix(0, ts).UTC().Format(time.RFC3339Nano) 4838 var rpMsg []byte 4839 if len(hdr) == 0 { 4840 const ht = "NATS/1.0\r\nNats-Stream: %s\r\nNats-Subject: %s\r\nNats-Sequence: %d\r\nNats-Time-Stamp: %s\r\nNats-Last-Sequence: %d\r\n\r\n" 4841 const htho = "NATS/1.0\r\nNats-Stream: %s\r\nNats-Subject: %s\r\nNats-Sequence: %d\r\nNats-Time-Stamp: %s\r\nNats-Last-Sequence: %d\r\nNats-Msg-Size: %d\r\n\r\n" 4842 if !thdrsOnly { 4843 hdr = fmt.Appendf(nil, ht, name, subject, seq, tsStr, tlseq) 4844 rpMsg = copyBytes(msg) 4845 } else { 4846 hdr = fmt.Appendf(nil, htho, name, subject, seq, tsStr, tlseq, len(msg)) 4847 } 4848 } else { 4849 // Slow path. 4850 hdr = genHeader(hdr, JSStream, name) 4851 hdr = genHeader(hdr, JSSubject, subject) 4852 hdr = genHeader(hdr, JSSequence, strconv.FormatUint(seq, 10)) 4853 hdr = genHeader(hdr, JSTimeStamp, tsStr) 4854 hdr = genHeader(hdr, JSLastSequence, strconv.FormatUint(tlseq, 10)) 4855 if !thdrsOnly { 4856 rpMsg = copyBytes(msg) 4857 } else { 4858 hdr = genHeader(hdr, JSMsgSize, strconv.Itoa(len(msg))) 4859 } 4860 } 4861 mset.outq.send(newJSPubMsg(tsubj, _EMPTY_, _EMPTY_, copyBytes(hdr), rpMsg, nil, seq)) 4862 } 4863 4864 // Send response here. 4865 if canRespond { 4866 response = append(pubAck, strconv.FormatUint(seq, 10)...) 4867 response = append(response, '}') 4868 mset.outq.sendMsg(reply, response) 4869 } 4870 4871 // Signal consumers for new messages. 4872 if numConsumers > 0 { 4873 mset.sigq.push(newCMsg(subject, seq)) 4874 select { 4875 case mset.sch <- struct{}{}: 4876 default: 4877 } 4878 } 4879 4880 return nil 4881 } 4882 4883 // Used to signal inbound message to registered consumers. 4884 type cMsg struct { 4885 seq uint64 4886 subj string 4887 } 4888 4889 // Pool to recycle consumer bound msgs. 4890 var cMsgPool sync.Pool 4891 4892 // Used to queue up consumer bound msgs for signaling. 4893 func newCMsg(subj string, seq uint64) *cMsg { 4894 var m *cMsg 4895 cm := cMsgPool.Get() 4896 if cm != nil { 4897 m = cm.(*cMsg) 4898 } else { 4899 m = new(cMsg) 4900 } 4901 m.subj, m.seq = subj, seq 4902 4903 return m 4904 } 4905 4906 func (m *cMsg) returnToPool() { 4907 if m == nil { 4908 return 4909 } 4910 m.subj, m.seq = _EMPTY_, 0 4911 cMsgPool.Put(m) 4912 } 4913 4914 // Go routine to signal consumers. 4915 // Offloaded from stream msg processing. 4916 func (mset *stream) signalConsumersLoop() { 4917 mset.mu.RLock() 4918 s, qch, sch, msgs := mset.srv, mset.qch, mset.sch, mset.sigq 4919 mset.mu.RUnlock() 4920 4921 for { 4922 select { 4923 case <-s.quitCh: 4924 return 4925 case <-qch: 4926 return 4927 case <-sch: 4928 cms := msgs.pop() 4929 for _, m := range cms { 4930 seq, subj := m.seq, m.subj 4931 m.returnToPool() 4932 // Signal all appropriate consumers. 4933 mset.signalConsumers(subj, seq) 4934 } 4935 msgs.recycle(&cms) 4936 } 4937 } 4938 } 4939 4940 // This will update and signal all consumers that match. 4941 func (mset *stream) signalConsumers(subj string, seq uint64) { 4942 mset.clsMu.RLock() 4943 if mset.csl == nil { 4944 mset.clsMu.RUnlock() 4945 return 4946 } 4947 r := mset.csl.Match(subj) 4948 mset.clsMu.RUnlock() 4949 4950 if len(r.psubs) == 0 { 4951 return 4952 } 4953 // Encode the sequence here. 4954 var eseq [8]byte 4955 var le = binary.LittleEndian 4956 le.PutUint64(eseq[:], seq) 4957 msg := eseq[:] 4958 for _, sub := range r.psubs { 4959 sub.icb(sub, nil, nil, subj, _EMPTY_, msg) 4960 } 4961 } 4962 4963 // Internal message for use by jetstream subsystem. 4964 type jsPubMsg struct { 4965 dsubj string // Subject to send to, e.g. _INBOX.xxx 4966 reply string 4967 StoreMsg 4968 o *consumer 4969 } 4970 4971 var jsPubMsgPool sync.Pool 4972 4973 func newJSPubMsg(dsubj, subj, reply string, hdr, msg []byte, o *consumer, seq uint64) *jsPubMsg { 4974 var m *jsPubMsg 4975 var buf []byte 4976 pm := jsPubMsgPool.Get() 4977 if pm != nil { 4978 m = pm.(*jsPubMsg) 4979 buf = m.buf[:0] 4980 } else { 4981 m = new(jsPubMsg) 4982 } 4983 // When getting something from a pool it is critical that all fields are 4984 // initialized. Doing this way guarantees that if someone adds a field to 4985 // the structure, the compiler will fail the build if this line is not updated. 4986 (*m) = jsPubMsg{dsubj, reply, StoreMsg{subj, hdr, msg, buf, seq, 0}, o} 4987 4988 return m 4989 } 4990 4991 // Gets a jsPubMsg from the pool. 4992 func getJSPubMsgFromPool() *jsPubMsg { 4993 pm := jsPubMsgPool.Get() 4994 if pm != nil { 4995 return pm.(*jsPubMsg) 4996 } 4997 return new(jsPubMsg) 4998 } 4999 5000 func (pm *jsPubMsg) returnToPool() { 5001 if pm == nil { 5002 return 5003 } 5004 pm.subj, pm.dsubj, pm.reply, pm.hdr, pm.msg, pm.o = _EMPTY_, _EMPTY_, _EMPTY_, nil, nil, nil 5005 if len(pm.buf) > 0 { 5006 pm.buf = pm.buf[:0] 5007 } 5008 jsPubMsgPool.Put(pm) 5009 } 5010 5011 func (pm *jsPubMsg) size() int { 5012 if pm == nil { 5013 return 0 5014 } 5015 return len(pm.dsubj) + len(pm.reply) + len(pm.hdr) + len(pm.msg) 5016 } 5017 5018 // Queue of *jsPubMsg for sending internal system messages. 5019 type jsOutQ struct { 5020 *ipQueue[*jsPubMsg] 5021 } 5022 5023 func (q *jsOutQ) sendMsg(subj string, msg []byte) { 5024 if q != nil { 5025 q.send(newJSPubMsg(subj, _EMPTY_, _EMPTY_, nil, msg, nil, 0)) 5026 } 5027 } 5028 5029 func (q *jsOutQ) send(msg *jsPubMsg) { 5030 if q == nil || msg == nil { 5031 return 5032 } 5033 q.push(msg) 5034 } 5035 5036 func (q *jsOutQ) unregister() { 5037 if q == nil { 5038 return 5039 } 5040 q.ipQueue.unregister() 5041 } 5042 5043 // StoredMsg is for raw access to messages in a stream. 5044 type StoredMsg struct { 5045 Subject string `json:"subject"` 5046 Sequence uint64 `json:"seq"` 5047 Header []byte `json:"hdrs,omitempty"` 5048 Data []byte `json:"data,omitempty"` 5049 Time time.Time `json:"time"` 5050 } 5051 5052 // This is similar to system semantics but did not want to overload the single system sendq, 5053 // or require system account when doing simple setup with jetstream. 5054 func (mset *stream) setupSendCapabilities() { 5055 mset.mu.Lock() 5056 defer mset.mu.Unlock() 5057 if mset.outq != nil { 5058 return 5059 } 5060 qname := fmt.Sprintf("[ACC:%s] stream '%s' sendQ", mset.acc.Name, mset.cfg.Name) 5061 mset.outq = &jsOutQ{newIPQueue[*jsPubMsg](mset.srv, qname)} 5062 go mset.internalLoop() 5063 } 5064 5065 // Returns the associated account name. 5066 func (mset *stream) accName() string { 5067 if mset == nil { 5068 return _EMPTY_ 5069 } 5070 mset.mu.RLock() 5071 acc := mset.acc 5072 mset.mu.RUnlock() 5073 return acc.Name 5074 } 5075 5076 // Name returns the stream name. 5077 func (mset *stream) name() string { 5078 if mset == nil { 5079 return _EMPTY_ 5080 } 5081 mset.mu.RLock() 5082 defer mset.mu.RUnlock() 5083 return mset.cfg.Name 5084 } 5085 5086 func (mset *stream) internalLoop() { 5087 mset.mu.RLock() 5088 s := mset.srv 5089 c := s.createInternalJetStreamClient() 5090 c.registerWithAccount(mset.acc) 5091 defer c.closeConnection(ClientClosed) 5092 outq, qch, msgs, gets := mset.outq, mset.qch, mset.msgs, mset.gets 5093 5094 // For the ack msgs queue for interest retention. 5095 var ( 5096 amch chan struct{} 5097 ackq *ipQueue[uint64] 5098 ) 5099 if mset.ackq != nil { 5100 ackq, amch = mset.ackq, mset.ackq.ch 5101 } 5102 mset.mu.RUnlock() 5103 5104 // Raw scratch buffer. 5105 // This should be rarely used now so can be smaller. 5106 var _r [1024]byte 5107 5108 // To optimize for not converting a string to a []byte slice. 5109 var ( 5110 subj [256]byte 5111 dsubj [256]byte 5112 rply [256]byte 5113 szb [10]byte 5114 hdb [10]byte 5115 ) 5116 5117 for { 5118 select { 5119 case <-outq.ch: 5120 pms := outq.pop() 5121 for _, pm := range pms { 5122 c.pa.subject = append(dsubj[:0], pm.dsubj...) 5123 c.pa.deliver = append(subj[:0], pm.subj...) 5124 c.pa.size = len(pm.msg) + len(pm.hdr) 5125 c.pa.szb = append(szb[:0], strconv.Itoa(c.pa.size)...) 5126 if len(pm.reply) > 0 { 5127 c.pa.reply = append(rply[:0], pm.reply...) 5128 } else { 5129 c.pa.reply = nil 5130 } 5131 5132 // If we have an underlying buf that is the wire contents for hdr + msg, else construct on the fly. 5133 var msg []byte 5134 if len(pm.buf) > 0 { 5135 msg = pm.buf 5136 } else { 5137 if len(pm.hdr) > 0 { 5138 msg = pm.hdr 5139 if len(pm.msg) > 0 { 5140 msg = _r[:0] 5141 msg = append(msg, pm.hdr...) 5142 msg = append(msg, pm.msg...) 5143 } 5144 } else if len(pm.msg) > 0 { 5145 // We own this now from a low level buffer perspective so can use directly here. 5146 msg = pm.msg 5147 } 5148 } 5149 5150 if len(pm.hdr) > 0 { 5151 c.pa.hdr = len(pm.hdr) 5152 c.pa.hdb = []byte(strconv.Itoa(c.pa.hdr)) 5153 c.pa.hdb = append(hdb[:0], strconv.Itoa(c.pa.hdr)...) 5154 } else { 5155 c.pa.hdr = -1 5156 c.pa.hdb = nil 5157 } 5158 5159 msg = append(msg, _CRLF_...) 5160 5161 didDeliver, _ := c.processInboundClientMsg(msg) 5162 c.pa.szb, c.pa.subject, c.pa.deliver = nil, nil, nil 5163 5164 // Check to see if this is a delivery for a consumer and 5165 // we failed to deliver the message. If so alert the consumer. 5166 if pm.o != nil && pm.seq > 0 && !didDeliver { 5167 pm.o.didNotDeliver(pm.seq, pm.dsubj) 5168 } 5169 pm.returnToPool() 5170 } 5171 // TODO: Move in the for-loop? 5172 c.flushClients(0) 5173 outq.recycle(&pms) 5174 case <-msgs.ch: 5175 // This can possibly change now so needs to be checked here. 5176 isClustered := mset.IsClustered() 5177 ims := msgs.pop() 5178 for _, im := range ims { 5179 // If we are clustered we need to propose this message to the underlying raft group. 5180 if isClustered { 5181 mset.processClusteredInboundMsg(im.subj, im.rply, im.hdr, im.msg, im.mt) 5182 } else { 5183 mset.processJetStreamMsg(im.subj, im.rply, im.hdr, im.msg, 0, 0, im.mt) 5184 } 5185 } 5186 msgs.recycle(&ims) 5187 case <-gets.ch: 5188 dgs := gets.pop() 5189 for _, dg := range dgs { 5190 mset.getDirectRequest(&dg.req, dg.reply) 5191 dgPool.Put(dg) 5192 } 5193 gets.recycle(&dgs) 5194 5195 case <-amch: 5196 seqs := ackq.pop() 5197 for _, seq := range seqs { 5198 mset.ackMsg(nil, seq) 5199 } 5200 ackq.recycle(&seqs) 5201 case <-qch: 5202 return 5203 case <-s.quitCh: 5204 return 5205 } 5206 } 5207 } 5208 5209 // Used to break consumers out of their monitorConsumer go routines. 5210 func (mset *stream) resetAndWaitOnConsumers() { 5211 mset.mu.RLock() 5212 consumers := make([]*consumer, 0, len(mset.consumers)) 5213 for _, o := range mset.consumers { 5214 consumers = append(consumers, o) 5215 } 5216 mset.mu.RUnlock() 5217 5218 for _, o := range consumers { 5219 if node := o.raftNode(); node != nil { 5220 if o.IsLeader() { 5221 node.StepDown() 5222 } 5223 node.Delete() 5224 } 5225 if o.isMonitorRunning() { 5226 o.monitorWg.Wait() 5227 } 5228 } 5229 } 5230 5231 // Internal function to delete a stream. 5232 func (mset *stream) delete() error { 5233 if mset == nil { 5234 return nil 5235 } 5236 return mset.stop(true, true) 5237 } 5238 5239 // Internal function to stop or delete the stream. 5240 func (mset *stream) stop(deleteFlag, advisory bool) error { 5241 mset.mu.RLock() 5242 js, jsa, name := mset.js, mset.jsa, mset.cfg.Name 5243 mset.mu.RUnlock() 5244 5245 if jsa == nil { 5246 return NewJSNotEnabledForAccountError() 5247 } 5248 5249 // Remove from our account map first. 5250 jsa.mu.Lock() 5251 delete(jsa.streams, name) 5252 accName := jsa.account.Name 5253 jsa.mu.Unlock() 5254 5255 // Kick monitor and collect consumers first. 5256 mset.mu.Lock() 5257 // Signal to the monitor loop. 5258 // Can't use qch here. 5259 if mset.mqch != nil { 5260 close(mset.mqch) 5261 mset.mqch = nil 5262 } 5263 5264 // Stop responding to sync requests. 5265 mset.stopClusterSubs() 5266 // Unsubscribe from direct stream. 5267 mset.unsubscribeToStream(true) 5268 5269 // Our info sub if we spun it up. 5270 if mset.infoSub != nil { 5271 mset.srv.sysUnsubscribe(mset.infoSub) 5272 mset.infoSub = nil 5273 } 5274 5275 // Clean up consumers. 5276 var obs []*consumer 5277 for _, o := range mset.consumers { 5278 obs = append(obs, o) 5279 } 5280 mset.clsMu.Lock() 5281 mset.consumers, mset.cList, mset.csl = nil, nil, nil 5282 mset.clsMu.Unlock() 5283 5284 // Check if we are a mirror. 5285 if mset.mirror != nil && mset.mirror.sub != nil { 5286 mset.unsubscribe(mset.mirror.sub) 5287 mset.mirror.sub = nil 5288 mset.removeInternalConsumer(mset.mirror) 5289 } 5290 // Now check for sources. 5291 if len(mset.sources) > 0 { 5292 for _, si := range mset.sources { 5293 mset.cancelSourceConsumer(si.iname) 5294 } 5295 } 5296 mset.mu.Unlock() 5297 5298 isShuttingDown := js.isShuttingDown() 5299 for _, o := range obs { 5300 if !o.isClosed() { 5301 // Third flag says do not broadcast a signal. 5302 // TODO(dlc) - If we have an err here we don't want to stop 5303 // but should we log? 5304 o.stopWithFlags(deleteFlag, deleteFlag, false, advisory) 5305 if !isShuttingDown { 5306 o.monitorWg.Wait() 5307 } 5308 } 5309 } 5310 5311 mset.mu.Lock() 5312 // Send stream delete advisory after the consumers. 5313 if deleteFlag && advisory { 5314 mset.sendDeleteAdvisoryLocked() 5315 } 5316 5317 // Mark closed. 5318 mset.closed.Store(true) 5319 5320 // Quit channel, do this after sending the delete advisory 5321 if mset.qch != nil { 5322 close(mset.qch) 5323 mset.qch = nil 5324 } 5325 5326 // Cluster cleanup 5327 var sa *streamAssignment 5328 if n := mset.node; n != nil { 5329 if deleteFlag { 5330 n.Delete() 5331 sa = mset.sa 5332 } else { 5333 // Always attempt snapshot on clean exit. 5334 n.InstallSnapshot(mset.stateSnapshotLocked()) 5335 n.Stop() 5336 } 5337 } 5338 5339 // Cleanup duplicate timer if running. 5340 if mset.ddtmr != nil { 5341 mset.ddtmr.Stop() 5342 mset.ddtmr = nil 5343 mset.ddmap = nil 5344 mset.ddarr = nil 5345 mset.ddindex = 0 5346 } 5347 5348 sysc := mset.sysc 5349 mset.sysc = nil 5350 5351 if deleteFlag { 5352 // Unregistering ipQueues do not prevent them from push/pop 5353 // just will remove them from the central monitoring map 5354 mset.msgs.unregister() 5355 mset.ackq.unregister() 5356 mset.outq.unregister() 5357 mset.sigq.unregister() 5358 } 5359 5360 // Snapshot store. 5361 store := mset.store 5362 c := mset.client 5363 5364 // Clustered cleanup. 5365 mset.mu.Unlock() 5366 5367 // Check if the stream assignment has the group node specified. 5368 // We need this cleared for if the stream gets reassigned here. 5369 if sa != nil { 5370 js.mu.Lock() 5371 if sa.Group != nil { 5372 sa.Group.node = nil 5373 } 5374 js.mu.Unlock() 5375 } 5376 5377 if c != nil { 5378 c.closeConnection(ClientClosed) 5379 } 5380 5381 if sysc != nil { 5382 sysc.closeConnection(ClientClosed) 5383 } 5384 5385 if deleteFlag { 5386 if store != nil { 5387 // Ignore errors. 5388 store.Delete() 5389 } 5390 // Release any resources. 5391 js.releaseStreamResources(&mset.cfg) 5392 // cleanup directories after the stream 5393 accDir := filepath.Join(js.config.StoreDir, accName) 5394 // Do cleanup in separate go routine similar to how fs will use purge here.. 5395 go func() { 5396 // no op if not empty 5397 os.Remove(filepath.Join(accDir, streamsDir)) 5398 os.Remove(accDir) 5399 }() 5400 } else if store != nil { 5401 // Ignore errors. 5402 store.Stop() 5403 } 5404 5405 return nil 5406 } 5407 5408 func (mset *stream) getMsg(seq uint64) (*StoredMsg, error) { 5409 var smv StoreMsg 5410 sm, err := mset.store.LoadMsg(seq, &smv) 5411 if err != nil { 5412 return nil, err 5413 } 5414 // This only used in tests directly so no need to pool etc. 5415 return &StoredMsg{ 5416 Subject: sm.subj, 5417 Sequence: sm.seq, 5418 Header: sm.hdr, 5419 Data: sm.msg, 5420 Time: time.Unix(0, sm.ts).UTC(), 5421 }, nil 5422 } 5423 5424 // getConsumers will return a copy of all the current consumers for this stream. 5425 func (mset *stream) getConsumers() []*consumer { 5426 mset.clsMu.RLock() 5427 defer mset.clsMu.RUnlock() 5428 return append([]*consumer(nil), mset.cList...) 5429 } 5430 5431 // Lock should be held for this one. 5432 func (mset *stream) numPublicConsumers() int { 5433 return len(mset.consumers) - mset.directs 5434 } 5435 5436 // This returns all consumers that are not DIRECT. 5437 func (mset *stream) getPublicConsumers() []*consumer { 5438 mset.clsMu.RLock() 5439 defer mset.clsMu.RUnlock() 5440 5441 var obs []*consumer 5442 for _, o := range mset.cList { 5443 if !o.cfg.Direct { 5444 obs = append(obs, o) 5445 } 5446 } 5447 return obs 5448 } 5449 5450 // Will check for interest retention and make sure messages 5451 // that have been acked are processed and removed. 5452 // This will check the ack floors of all consumers, and adjust our first sequence accordingly. 5453 func (mset *stream) checkInterestState() { 5454 if mset == nil || !mset.isInterestRetention() { 5455 // If we are limits based nothing to do. 5456 return 5457 } 5458 5459 var zeroAcks []*consumer 5460 var lowAckFloor uint64 = math.MaxUint64 5461 consumers := mset.getConsumers() 5462 5463 for _, o := range consumers { 5464 o.checkStateForInterestStream() 5465 5466 o.mu.Lock() 5467 if o.isLeader() { 5468 // We need to account for consumers with ack floor of zero. 5469 // We will collect them and see if we need to check pending below. 5470 if o.asflr == 0 { 5471 zeroAcks = append(zeroAcks, o) 5472 } else if o.asflr < lowAckFloor { 5473 lowAckFloor = o.asflr 5474 } 5475 } else { 5476 // We are a follower so only have the store state, so read that in. 5477 state, err := o.store.State() 5478 if err != nil { 5479 // On error we will not have enough information to process correctly so bail. 5480 o.mu.Unlock() 5481 return 5482 } 5483 // We need to account for consumers with ack floor of zero. 5484 if state.AckFloor.Stream == 0 { 5485 zeroAcks = append(zeroAcks, o) 5486 } else if state.AckFloor.Stream < lowAckFloor { 5487 lowAckFloor = state.AckFloor.Stream 5488 } 5489 // We are a follower here but if we detect a drift from when we were previous leader correct here. 5490 if o.asflr > state.AckFloor.Stream || o.sseq > state.Delivered.Stream+1 { 5491 o.applyState(state) 5492 } 5493 } 5494 o.mu.Unlock() 5495 } 5496 5497 // If nothing was set we can bail. 5498 if lowAckFloor == math.MaxUint64 { 5499 return 5500 } 5501 5502 // Hold stream write lock in case we need to purge. 5503 mset.mu.Lock() 5504 defer mset.mu.Unlock() 5505 5506 // Capture our current state. 5507 var state StreamState 5508 mset.store.FastState(&state) 5509 5510 if lowAckFloor < math.MaxUint64 && lowAckFloor > state.FirstSeq { 5511 // Check if we had any zeroAcks, we will need to check them. 5512 for _, o := range zeroAcks { 5513 var np uint64 5514 o.mu.RLock() 5515 if o.isLeader() { 5516 np = uint64(o.numPending()) 5517 } else { 5518 np, _ = o.calculateNumPending() 5519 } 5520 o.mu.RUnlock() 5521 // This means we have pending and can not remove anything at this time. 5522 if np > 0 { 5523 return 5524 } 5525 } 5526 if lowAckFloor <= state.LastSeq { 5527 // Purge the stream to lowest ack floor + 1 5528 mset.store.PurgeEx(_EMPTY_, lowAckFloor+1, 0) 5529 } else { 5530 // Here we have a low ack floor higher then our last seq. 5531 // So we will just do normal purge. 5532 mset.store.Purge() 5533 } 5534 } 5535 // Make sure to reset our local lseq. 5536 mset.store.FastState(&state) 5537 mset.lseq = state.LastSeq 5538 // Also make sure we clear any pending acks. 5539 mset.clearAllPreAcksBelowFloor(state.FirstSeq) 5540 } 5541 5542 func (mset *stream) isInterestRetention() bool { 5543 mset.mu.RLock() 5544 defer mset.mu.RUnlock() 5545 return mset.cfg.Retention != LimitsPolicy 5546 } 5547 5548 // NumConsumers reports on number of active consumers for this stream. 5549 func (mset *stream) numConsumers() int { 5550 mset.mu.RLock() 5551 defer mset.mu.RUnlock() 5552 return len(mset.consumers) 5553 } 5554 5555 // Lock should be held. 5556 func (mset *stream) setConsumer(o *consumer) { 5557 mset.consumers[o.name] = o 5558 if len(o.subjf) > 0 { 5559 mset.numFilter++ 5560 } 5561 if o.cfg.Direct { 5562 mset.directs++ 5563 } 5564 // Now update consumers list as well 5565 mset.clsMu.Lock() 5566 mset.cList = append(mset.cList, o) 5567 mset.clsMu.Unlock() 5568 } 5569 5570 // Lock should be held. 5571 func (mset *stream) removeConsumer(o *consumer) { 5572 if o.cfg.FilterSubject != _EMPTY_ && mset.numFilter > 0 { 5573 mset.numFilter-- 5574 } 5575 if o.cfg.Direct && mset.directs > 0 { 5576 mset.directs-- 5577 } 5578 if mset.consumers != nil { 5579 delete(mset.consumers, o.name) 5580 // Now update consumers list as well 5581 mset.clsMu.Lock() 5582 for i, ol := range mset.cList { 5583 if ol == o { 5584 mset.cList = append(mset.cList[:i], mset.cList[i+1:]...) 5585 break 5586 } 5587 } 5588 // Always remove from the leader sublist. 5589 if mset.csl != nil { 5590 for _, sub := range o.signalSubs() { 5591 mset.csl.Remove(sub) 5592 } 5593 } 5594 mset.clsMu.Unlock() 5595 } 5596 } 5597 5598 // Set the consumer as a leader. This will update signaling sublist. 5599 func (mset *stream) setConsumerAsLeader(o *consumer) { 5600 mset.clsMu.Lock() 5601 defer mset.clsMu.Unlock() 5602 5603 if mset.csl == nil { 5604 mset.csl = NewSublistWithCache() 5605 } 5606 for _, sub := range o.signalSubs() { 5607 mset.csl.Insert(sub) 5608 } 5609 } 5610 5611 // Remove the consumer as a leader. This will update signaling sublist. 5612 func (mset *stream) removeConsumerAsLeader(o *consumer) { 5613 mset.clsMu.Lock() 5614 defer mset.clsMu.Unlock() 5615 if mset.csl != nil { 5616 for _, sub := range o.signalSubs() { 5617 mset.csl.Remove(sub) 5618 } 5619 } 5620 } 5621 5622 // swapSigSubs will update signal Subs for a new subject filter. 5623 // consumer lock should not be held. 5624 func (mset *stream) swapSigSubs(o *consumer, newFilters []string) { 5625 mset.clsMu.Lock() 5626 o.mu.Lock() 5627 5628 if o.closed || o.mset == nil { 5629 o.mu.Unlock() 5630 mset.clsMu.Unlock() 5631 return 5632 } 5633 5634 if o.sigSubs != nil { 5635 if mset.csl != nil { 5636 for _, sub := range o.sigSubs { 5637 mset.csl.Remove(sub) 5638 } 5639 } 5640 o.sigSubs = nil 5641 } 5642 5643 if o.isLeader() { 5644 if mset.csl == nil { 5645 mset.csl = NewSublistWithCache() 5646 } 5647 // If no filters are preset, add fwcs to sublist for that consumer. 5648 if newFilters == nil { 5649 sub := &subscription{subject: []byte(fwcs), icb: o.processStreamSignal} 5650 mset.csl.Insert(sub) 5651 o.sigSubs = append(o.sigSubs, sub) 5652 // If there are filters, add their subjects to sublist. 5653 } else { 5654 for _, filter := range newFilters { 5655 sub := &subscription{subject: []byte(filter), icb: o.processStreamSignal} 5656 mset.csl.Insert(sub) 5657 o.sigSubs = append(o.sigSubs, sub) 5658 } 5659 } 5660 } 5661 o.mu.Unlock() 5662 mset.clsMu.Unlock() 5663 5664 mset.mu.Lock() 5665 defer mset.mu.Unlock() 5666 5667 if mset.numFilter > 0 && len(o.subjf) > 0 { 5668 mset.numFilter-- 5669 } 5670 if len(newFilters) > 0 { 5671 mset.numFilter++ 5672 } 5673 } 5674 5675 // lookupConsumer will retrieve a consumer by name. 5676 func (mset *stream) lookupConsumer(name string) *consumer { 5677 mset.mu.RLock() 5678 defer mset.mu.RUnlock() 5679 return mset.consumers[name] 5680 } 5681 5682 func (mset *stream) numDirectConsumers() (num int) { 5683 mset.clsMu.RLock() 5684 defer mset.clsMu.RUnlock() 5685 5686 // Consumers that are direct are not recorded at the store level. 5687 for _, o := range mset.cList { 5688 o.mu.RLock() 5689 if o.cfg.Direct { 5690 num++ 5691 } 5692 o.mu.RUnlock() 5693 } 5694 return num 5695 } 5696 5697 // State will return the current state for this stream. 5698 func (mset *stream) state() StreamState { 5699 return mset.stateWithDetail(false) 5700 } 5701 5702 func (mset *stream) stateWithDetail(details bool) StreamState { 5703 // mset.store does not change once set, so ok to reference here directly. 5704 // We do this elsewhere as well. 5705 store := mset.store 5706 if store == nil { 5707 return StreamState{} 5708 } 5709 5710 // Currently rely on store for details. 5711 if details { 5712 return store.State() 5713 } 5714 // Here we do the fast version. 5715 var state StreamState 5716 store.FastState(&state) 5717 return state 5718 } 5719 5720 func (mset *stream) Store() StreamStore { 5721 mset.mu.RLock() 5722 defer mset.mu.RUnlock() 5723 return mset.store 5724 } 5725 5726 // Determines if the new proposed partition is unique amongst all consumers. 5727 // Lock should be held. 5728 func (mset *stream) partitionUnique(name string, partitions []string) bool { 5729 for _, partition := range partitions { 5730 for n, o := range mset.consumers { 5731 // Skip the consumer being checked. 5732 if n == name { 5733 continue 5734 } 5735 if o.subjf == nil { 5736 return false 5737 } 5738 for _, filter := range o.subjf { 5739 if SubjectsCollide(partition, filter.subject) { 5740 return false 5741 } 5742 } 5743 } 5744 } 5745 return true 5746 } 5747 5748 // Lock should be held. 5749 func (mset *stream) potentialFilteredConsumers() bool { 5750 numSubjects := len(mset.cfg.Subjects) 5751 if len(mset.consumers) == 0 || numSubjects == 0 { 5752 return false 5753 } 5754 if numSubjects > 1 || subjectHasWildcard(mset.cfg.Subjects[0]) { 5755 return true 5756 } 5757 return false 5758 } 5759 5760 // Check if there is no interest in this sequence number across our consumers. 5761 // The consumer passed is optional if we are processing the ack for that consumer. 5762 // Write lock should be held. 5763 func (mset *stream) noInterest(seq uint64, obs *consumer) bool { 5764 return !mset.checkForInterest(seq, obs) 5765 } 5766 5767 // Check if there is no interest in this sequence number and subject across our consumers. 5768 // The consumer passed is optional if we are processing the ack for that consumer. 5769 // Write lock should be held. 5770 func (mset *stream) noInterestWithSubject(seq uint64, subj string, obs *consumer) bool { 5771 return !mset.checkForInterestWithSubject(seq, subj, obs) 5772 } 5773 5774 // Write lock should be held here for the stream to avoid race conditions on state. 5775 func (mset *stream) checkForInterest(seq uint64, obs *consumer) bool { 5776 var subj string 5777 if mset.potentialFilteredConsumers() { 5778 pmsg := getJSPubMsgFromPool() 5779 defer pmsg.returnToPool() 5780 sm, err := mset.store.LoadMsg(seq, &pmsg.StoreMsg) 5781 if err != nil { 5782 if err == ErrStoreEOF { 5783 // Register this as a preAck. 5784 mset.registerPreAck(obs, seq) 5785 return true 5786 } 5787 mset.clearAllPreAcks(seq) 5788 return false 5789 } 5790 subj = sm.subj 5791 } 5792 return mset.checkForInterestWithSubject(seq, subj, obs) 5793 } 5794 5795 // Checks for interest given a sequence and subject. 5796 func (mset *stream) checkForInterestWithSubject(seq uint64, subj string, obs *consumer) bool { 5797 for _, o := range mset.consumers { 5798 // If this is us or we have a registered preAck for this consumer continue inspecting. 5799 if o == obs || mset.hasPreAck(o, seq) { 5800 continue 5801 } 5802 // Check if we need an ack. 5803 if o.needAck(seq, subj) { 5804 return true 5805 } 5806 } 5807 mset.clearAllPreAcks(seq) 5808 return false 5809 } 5810 5811 // Check if we have a pre-registered ack for this sequence. 5812 // Write lock should be held. 5813 func (mset *stream) hasPreAck(o *consumer, seq uint64) bool { 5814 if o == nil || len(mset.preAcks) == 0 { 5815 return false 5816 } 5817 consumers := mset.preAcks[seq] 5818 if len(consumers) == 0 { 5819 return false 5820 } 5821 _, found := consumers[o] 5822 return found 5823 } 5824 5825 // Check if we have all consumers pre-acked for this sequence and subject. 5826 // Write lock should be held. 5827 func (mset *stream) hasAllPreAcks(seq uint64, subj string) bool { 5828 if len(mset.preAcks) == 0 || len(mset.preAcks[seq]) == 0 { 5829 return false 5830 } 5831 // Since these can be filtered and mutually exclusive, 5832 // if we have some preAcks we need to check all interest here. 5833 return mset.noInterestWithSubject(seq, subj, nil) 5834 } 5835 5836 // Check if we have all consumers pre-acked. 5837 // Write lock should be held. 5838 func (mset *stream) clearAllPreAcks(seq uint64) { 5839 delete(mset.preAcks, seq) 5840 } 5841 5842 // Clear all preAcks below floor. 5843 // Write lock should be held. 5844 func (mset *stream) clearAllPreAcksBelowFloor(floor uint64) { 5845 for seq := range mset.preAcks { 5846 if seq < floor { 5847 delete(mset.preAcks, seq) 5848 } 5849 } 5850 } 5851 5852 // This will register an ack for a consumer if it arrives before the actual message. 5853 func (mset *stream) registerPreAckLock(o *consumer, seq uint64) { 5854 mset.mu.Lock() 5855 defer mset.mu.Unlock() 5856 mset.registerPreAck(o, seq) 5857 } 5858 5859 // This will register an ack for a consumer if it arrives before 5860 // the actual message. 5861 // Write lock should be held. 5862 func (mset *stream) registerPreAck(o *consumer, seq uint64) { 5863 if o == nil { 5864 return 5865 } 5866 if mset.preAcks == nil { 5867 mset.preAcks = make(map[uint64]map[*consumer]struct{}) 5868 } 5869 if mset.preAcks[seq] == nil { 5870 mset.preAcks[seq] = make(map[*consumer]struct{}) 5871 } 5872 mset.preAcks[seq][o] = struct{}{} 5873 } 5874 5875 // This will clear an ack for a consumer. 5876 // Write lock should be held. 5877 func (mset *stream) clearPreAck(o *consumer, seq uint64) { 5878 if o == nil || len(mset.preAcks) == 0 { 5879 return 5880 } 5881 if consumers := mset.preAcks[seq]; len(consumers) > 0 { 5882 delete(consumers, o) 5883 if len(consumers) == 0 { 5884 delete(mset.preAcks, seq) 5885 } 5886 } 5887 } 5888 5889 // ackMsg is called into from a consumer when we have a WorkQueue or Interest Retention Policy. 5890 func (mset *stream) ackMsg(o *consumer, seq uint64) { 5891 if seq == 0 { 5892 return 5893 } 5894 5895 // Don't make this RLock(). We need to have only 1 running at a time to gauge interest across all consumers. 5896 mset.mu.Lock() 5897 if mset.closed.Load() || mset.cfg.Retention == LimitsPolicy { 5898 mset.mu.Unlock() 5899 return 5900 } 5901 5902 store := mset.store 5903 var state StreamState 5904 store.FastState(&state) 5905 5906 // If this has arrived before we have processed the message itself. 5907 if seq > state.LastSeq { 5908 mset.registerPreAck(o, seq) 5909 mset.mu.Unlock() 5910 return 5911 } 5912 5913 // Always clear pre-ack if here. 5914 mset.clearPreAck(o, seq) 5915 5916 // Make sure this sequence is not below our first sequence. 5917 if seq < state.FirstSeq { 5918 mset.mu.Unlock() 5919 return 5920 } 5921 5922 var shouldRemove bool 5923 switch mset.cfg.Retention { 5924 case WorkQueuePolicy: 5925 // Normally we just remove a message when its ack'd here but if we have direct consumers 5926 // from sources and/or mirrors we need to make sure they have delivered the msg. 5927 shouldRemove = mset.directs <= 0 || mset.noInterest(seq, o) 5928 case InterestPolicy: 5929 shouldRemove = mset.noInterest(seq, o) 5930 } 5931 mset.mu.Unlock() 5932 5933 // If nothing else to do. 5934 if !shouldRemove { 5935 return 5936 } 5937 5938 // If we are here we should attempt to remove. 5939 if _, err := store.RemoveMsg(seq); err == ErrStoreEOF { 5940 // This should not happen, but being pedantic. 5941 mset.registerPreAckLock(o, seq) 5942 } 5943 } 5944 5945 // Snapshot creates a snapshot for the stream and possibly consumers. 5946 func (mset *stream) snapshot(deadline time.Duration, checkMsgs, includeConsumers bool) (*SnapshotResult, error) { 5947 if mset.closed.Load() { 5948 return nil, errStreamClosed 5949 } 5950 store := mset.store 5951 return store.Snapshot(deadline, checkMsgs, includeConsumers) 5952 } 5953 5954 const snapsDir = "__snapshots__" 5955 5956 // RestoreStream will restore a stream from a snapshot. 5957 func (a *Account) RestoreStream(ncfg *StreamConfig, r io.Reader) (*stream, error) { 5958 if ncfg == nil { 5959 return nil, errors.New("nil config on stream restore") 5960 } 5961 5962 s, jsa, err := a.checkForJetStream() 5963 if err != nil { 5964 return nil, err 5965 } 5966 5967 cfg, apiErr := s.checkStreamCfg(ncfg, a) 5968 if apiErr != nil { 5969 return nil, apiErr 5970 } 5971 5972 sd := filepath.Join(jsa.storeDir, snapsDir) 5973 if _, err := os.Stat(sd); os.IsNotExist(err) { 5974 if err := os.MkdirAll(sd, defaultDirPerms); err != nil { 5975 return nil, fmt.Errorf("could not create snapshots directory - %v", err) 5976 } 5977 } 5978 sdir, err := os.MkdirTemp(sd, "snap-") 5979 if err != nil { 5980 return nil, err 5981 } 5982 if _, err := os.Stat(sdir); os.IsNotExist(err) { 5983 if err := os.MkdirAll(sdir, defaultDirPerms); err != nil { 5984 return nil, fmt.Errorf("could not create snapshots directory - %v", err) 5985 } 5986 } 5987 defer os.RemoveAll(sdir) 5988 5989 logAndReturnError := func() error { 5990 a.mu.RLock() 5991 err := fmt.Errorf("unexpected content (account=%s)", a.Name) 5992 if a.srv != nil { 5993 a.srv.Errorf("Stream restore failed due to %v", err) 5994 } 5995 a.mu.RUnlock() 5996 return err 5997 } 5998 sdirCheck := filepath.Clean(sdir) + string(os.PathSeparator) 5999 6000 tr := tar.NewReader(s2.NewReader(r)) 6001 for { 6002 hdr, err := tr.Next() 6003 if err == io.EOF { 6004 break // End of snapshot 6005 } 6006 if err != nil { 6007 return nil, err 6008 } 6009 if hdr.Typeflag != tar.TypeReg { 6010 return nil, logAndReturnError() 6011 } 6012 fpath := filepath.Join(sdir, filepath.Clean(hdr.Name)) 6013 if !strings.HasPrefix(fpath, sdirCheck) { 6014 return nil, logAndReturnError() 6015 } 6016 os.MkdirAll(filepath.Dir(fpath), defaultDirPerms) 6017 fd, err := os.OpenFile(fpath, os.O_CREATE|os.O_RDWR, 0600) 6018 if err != nil { 6019 return nil, err 6020 } 6021 _, err = io.Copy(fd, tr) 6022 fd.Close() 6023 if err != nil { 6024 return nil, err 6025 } 6026 } 6027 6028 // Check metadata. 6029 // The cfg passed in will be the new identity for the stream. 6030 var fcfg FileStreamInfo 6031 b, err := os.ReadFile(filepath.Join(sdir, JetStreamMetaFile)) 6032 if err != nil { 6033 return nil, err 6034 } 6035 if err := json.Unmarshal(b, &fcfg); err != nil { 6036 return nil, err 6037 } 6038 6039 // Check to make sure names match. 6040 if fcfg.Name != cfg.Name { 6041 return nil, errors.New("stream names do not match") 6042 } 6043 6044 // See if this stream already exists. 6045 if _, err := a.lookupStream(cfg.Name); err == nil { 6046 return nil, NewJSStreamNameExistRestoreFailedError() 6047 } 6048 // Move into the correct place here. 6049 ndir := filepath.Join(jsa.storeDir, streamsDir, cfg.Name) 6050 // Remove old one if for some reason it is still here. 6051 if _, err := os.Stat(ndir); err == nil { 6052 os.RemoveAll(ndir) 6053 } 6054 // Make sure our destination streams directory exists. 6055 if err := os.MkdirAll(filepath.Join(jsa.storeDir, streamsDir), defaultDirPerms); err != nil { 6056 return nil, err 6057 } 6058 // Move into new location. 6059 if err := os.Rename(sdir, ndir); err != nil { 6060 return nil, err 6061 } 6062 6063 if cfg.Template != _EMPTY_ { 6064 if err := jsa.addStreamNameToTemplate(cfg.Template, cfg.Name); err != nil { 6065 return nil, err 6066 } 6067 } 6068 mset, err := a.addStream(&cfg) 6069 if err != nil { 6070 return nil, err 6071 } 6072 if !fcfg.Created.IsZero() { 6073 mset.setCreatedTime(fcfg.Created) 6074 } 6075 lseq := mset.lastSeq() 6076 6077 // Make sure we do an update if the configs have changed. 6078 if !reflect.DeepEqual(fcfg.StreamConfig, cfg) { 6079 if err := mset.update(&cfg); err != nil { 6080 return nil, err 6081 } 6082 } 6083 6084 // Now do consumers. 6085 odir := filepath.Join(ndir, consumerDir) 6086 ofis, _ := os.ReadDir(odir) 6087 for _, ofi := range ofis { 6088 metafile := filepath.Join(odir, ofi.Name(), JetStreamMetaFile) 6089 metasum := filepath.Join(odir, ofi.Name(), JetStreamMetaFileSum) 6090 if _, err := os.Stat(metafile); os.IsNotExist(err) { 6091 mset.stop(true, false) 6092 return nil, fmt.Errorf("error restoring consumer [%q]: %v", ofi.Name(), err) 6093 } 6094 buf, err := os.ReadFile(metafile) 6095 if err != nil { 6096 mset.stop(true, false) 6097 return nil, fmt.Errorf("error restoring consumer [%q]: %v", ofi.Name(), err) 6098 } 6099 if _, err := os.Stat(metasum); os.IsNotExist(err) { 6100 mset.stop(true, false) 6101 return nil, fmt.Errorf("error restoring consumer [%q]: %v", ofi.Name(), err) 6102 } 6103 var cfg FileConsumerInfo 6104 if err := json.Unmarshal(buf, &cfg); err != nil { 6105 mset.stop(true, false) 6106 return nil, fmt.Errorf("error restoring consumer [%q]: %v", ofi.Name(), err) 6107 } 6108 isEphemeral := !isDurableConsumer(&cfg.ConsumerConfig) 6109 if isEphemeral { 6110 // This is an ephermal consumer and this could fail on restart until 6111 // the consumer can reconnect. We will create it as a durable and switch it. 6112 cfg.ConsumerConfig.Durable = ofi.Name() 6113 } 6114 obs, err := mset.addConsumer(&cfg.ConsumerConfig) 6115 if err != nil { 6116 mset.stop(true, false) 6117 return nil, fmt.Errorf("error restoring consumer [%q]: %v", ofi.Name(), err) 6118 } 6119 if isEphemeral { 6120 obs.switchToEphemeral() 6121 } 6122 if !cfg.Created.IsZero() { 6123 obs.setCreatedTime(cfg.Created) 6124 } 6125 obs.mu.Lock() 6126 err = obs.readStoredState(lseq) 6127 obs.mu.Unlock() 6128 if err != nil { 6129 mset.stop(true, false) 6130 return nil, fmt.Errorf("error restoring consumer [%q]: %v", ofi.Name(), err) 6131 } 6132 } 6133 return mset, nil 6134 } 6135 6136 // This is to check for dangling messages on interest retention streams. Only called on account enable. 6137 // Issue https://github.com/nats-io/nats-server/issues/3612 6138 func (mset *stream) checkForOrphanMsgs() { 6139 mset.mu.RLock() 6140 consumers := make([]*consumer, 0, len(mset.consumers)) 6141 for _, o := range mset.consumers { 6142 consumers = append(consumers, o) 6143 } 6144 accName, stream := mset.acc.Name, mset.cfg.Name 6145 6146 var ss StreamState 6147 mset.store.FastState(&ss) 6148 mset.mu.RUnlock() 6149 6150 for _, o := range consumers { 6151 if err := o.checkStateForInterestStream(); err == errAckFloorHigherThanLastSeq { 6152 o.mu.RLock() 6153 s, consumer := o.srv, o.name 6154 state, _ := o.store.State() 6155 asflr := state.AckFloor.Stream 6156 o.mu.RUnlock() 6157 // Warn about stream state vs our ack floor. 6158 s.RateLimitWarnf("Detected consumer '%s > %s > %s' ack floor %d is ahead of stream's last sequence %d", 6159 accName, stream, consumer, asflr, ss.LastSeq) 6160 } 6161 } 6162 } 6163 6164 // Check on startup to make sure that consumers replication matches us. 6165 // Interest retention requires replication matches. 6166 func (mset *stream) checkConsumerReplication() { 6167 mset.mu.RLock() 6168 defer mset.mu.RUnlock() 6169 6170 if mset.cfg.Retention != InterestPolicy { 6171 return 6172 } 6173 6174 s, acc := mset.srv, mset.acc 6175 for _, o := range mset.consumers { 6176 o.mu.RLock() 6177 // Consumer replicas 0 can be a legit config for the replicas and we will inherit from the stream 6178 // when this is the case. 6179 if mset.cfg.Replicas != o.cfg.Replicas && o.cfg.Replicas != 0 { 6180 s.Errorf("consumer '%s > %s > %s' MUST match replication (%d vs %d) of stream with interest policy", 6181 acc, mset.cfg.Name, o.cfg.Name, mset.cfg.Replicas, o.cfg.Replicas) 6182 } 6183 o.mu.RUnlock() 6184 } 6185 } 6186 6187 // Will check if we are running in the monitor already and if not set the appropriate flag. 6188 func (mset *stream) checkInMonitor() bool { 6189 mset.mu.Lock() 6190 defer mset.mu.Unlock() 6191 6192 if mset.inMonitor { 6193 return true 6194 } 6195 mset.inMonitor = true 6196 return false 6197 } 6198 6199 // Clear us being in the monitor routine. 6200 func (mset *stream) clearMonitorRunning() { 6201 mset.mu.Lock() 6202 defer mset.mu.Unlock() 6203 mset.inMonitor = false 6204 }