get.pme.sh/pnats@v0.0.0-20240304004023-26bb5a137ed0/server/stream.go (about) 1 // Copyright 2019-2024 The NATS Authors 2 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // you may not use this file except in compliance with the License. 4 // You may obtain a copy of the License at 5 // 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package server 15 16 import ( 17 "archive/tar" 18 "bytes" 19 "encoding/binary" 20 "encoding/json" 21 "errors" 22 "fmt" 23 "io" 24 "math" 25 "math/rand" 26 "os" 27 "path/filepath" 28 "reflect" 29 "strconv" 30 "strings" 31 "sync" 32 "sync/atomic" 33 "time" 34 35 "github.com/klauspost/compress/s2" 36 "github.com/nats-io/nuid" 37 ) 38 39 // StreamConfig will determine the name, subjects and retention policy 40 // for a given stream. If subjects is empty the name will be used. 41 type StreamConfig struct { 42 Name string `json:"name"` 43 Description string `json:"description,omitempty"` 44 Subjects []string `json:"subjects,omitempty"` 45 Retention RetentionPolicy `json:"retention"` 46 MaxConsumers int `json:"max_consumers"` 47 MaxMsgs int64 `json:"max_msgs"` 48 MaxBytes int64 `json:"max_bytes"` 49 MaxAge time.Duration `json:"max_age"` 50 MaxMsgsPer int64 `json:"max_msgs_per_subject"` 51 MaxMsgSize int32 `json:"max_msg_size,omitempty"` 52 Discard DiscardPolicy `json:"discard"` 53 Storage StorageType `json:"storage"` 54 Replicas int `json:"num_replicas"` 55 NoAck bool `json:"no_ack,omitempty"` 56 Template string `json:"template_owner,omitempty"` 57 Duplicates time.Duration `json:"duplicate_window,omitempty"` 58 Placement *Placement `json:"placement,omitempty"` 59 Mirror *StreamSource `json:"mirror,omitempty"` 60 Sources []*StreamSource `json:"sources,omitempty"` 61 Compression StoreCompression `json:"compression"` 62 FirstSeq uint64 `json:"first_seq,omitempty"` 63 64 // Allow applying a subject transform to incoming messages before doing anything else 65 SubjectTransform *SubjectTransformConfig `json:"subject_transform,omitempty"` 66 67 // Allow republish of the message after being sequenced and stored. 68 RePublish *RePublish `json:"republish,omitempty"` 69 70 // Allow higher performance, direct access to get individual messages. E.g. KeyValue 71 AllowDirect bool `json:"allow_direct"` 72 // Allow higher performance and unified direct access for mirrors as well. 73 MirrorDirect bool `json:"mirror_direct"` 74 75 // Allow KV like semantics to also discard new on a per subject basis 76 DiscardNewPer bool `json:"discard_new_per_subject,omitempty"` 77 78 // Optional qualifiers. These can not be modified after set to true. 79 80 // Sealed will seal a stream so no messages can get out or in. 81 Sealed bool `json:"sealed"` 82 // DenyDelete will restrict the ability to delete messages. 83 DenyDelete bool `json:"deny_delete"` 84 // DenyPurge will restrict the ability to purge messages. 85 DenyPurge bool `json:"deny_purge"` 86 // AllowRollup allows messages to be placed into the system and purge 87 // all older messages using a special msg header. 88 AllowRollup bool `json:"allow_rollup_hdrs"` 89 90 // The following defaults will apply to consumers when created against 91 // this stream, unless overridden manually. 92 // TODO(nat): Can/should we name these better? 93 ConsumerLimits StreamConsumerLimits `json:"consumer_limits"` 94 95 // Metadata is additional metadata for the Stream. 96 Metadata map[string]string `json:"metadata,omitempty"` 97 } 98 99 type StreamConsumerLimits struct { 100 InactiveThreshold time.Duration `json:"inactive_threshold,omitempty"` 101 MaxAckPending int `json:"max_ack_pending,omitempty"` 102 } 103 104 // SubjectTransformConfig is for applying a subject transform (to matching messages) before doing anything else when a new message is received 105 type SubjectTransformConfig struct { 106 Source string `json:"src"` 107 Destination string `json:"dest"` 108 } 109 110 // RePublish is for republishing messages once committed to a stream. 111 type RePublish struct { 112 Source string `json:"src,omitempty"` 113 Destination string `json:"dest"` 114 HeadersOnly bool `json:"headers_only,omitempty"` 115 } 116 117 // JSPubAckResponse is a formal response to a publish operation. 118 type JSPubAckResponse struct { 119 Error *ApiError `json:"error,omitempty"` 120 *PubAck 121 } 122 123 // ToError checks if the response has a error and if it does converts it to an error 124 // avoiding the pitfalls described by https://yourbasic.org/golang/gotcha-why-nil-error-not-equal-nil/ 125 func (r *JSPubAckResponse) ToError() error { 126 if r.Error == nil { 127 return nil 128 } 129 return r.Error 130 } 131 132 // PubAck is the detail you get back from a publish to a stream that was successful. 133 // e.g. +OK {"stream": "Orders", "seq": 22} 134 type PubAck struct { 135 Stream string `json:"stream"` 136 Sequence uint64 `json:"seq"` 137 Domain string `json:"domain,omitempty"` 138 Duplicate bool `json:"duplicate,omitempty"` 139 } 140 141 // StreamInfo shows config and current state for this stream. 142 type StreamInfo struct { 143 Config StreamConfig `json:"config"` 144 Created time.Time `json:"created"` 145 State StreamState `json:"state"` 146 Domain string `json:"domain,omitempty"` 147 Cluster *ClusterInfo `json:"cluster,omitempty"` 148 Mirror *StreamSourceInfo `json:"mirror,omitempty"` 149 Sources []*StreamSourceInfo `json:"sources,omitempty"` 150 Alternates []StreamAlternate `json:"alternates,omitempty"` 151 // TimeStamp indicates when the info was gathered 152 TimeStamp time.Time `json:"ts"` 153 } 154 155 type StreamAlternate struct { 156 Name string `json:"name"` 157 Domain string `json:"domain,omitempty"` 158 Cluster string `json:"cluster"` 159 } 160 161 // ClusterInfo shows information about the underlying set of servers 162 // that make up the stream or consumer. 163 type ClusterInfo struct { 164 Name string `json:"name,omitempty"` 165 RaftGroup string `json:"raft_group,omitempty"` 166 Leader string `json:"leader,omitempty"` 167 Replicas []*PeerInfo `json:"replicas,omitempty"` 168 } 169 170 // PeerInfo shows information about all the peers in the cluster that 171 // are supporting the stream or consumer. 172 type PeerInfo struct { 173 Name string `json:"name"` 174 Current bool `json:"current"` 175 Offline bool `json:"offline,omitempty"` 176 Active time.Duration `json:"active"` 177 Lag uint64 `json:"lag,omitempty"` 178 Peer string `json:"peer"` 179 // For migrations. 180 cluster string 181 } 182 183 // StreamSourceInfo shows information about an upstream stream source. 184 type StreamSourceInfo struct { 185 Name string `json:"name"` 186 External *ExternalStream `json:"external,omitempty"` 187 Lag uint64 `json:"lag"` 188 Active time.Duration `json:"active"` 189 Error *ApiError `json:"error,omitempty"` 190 FilterSubject string `json:"filter_subject,omitempty"` 191 SubjectTransforms []SubjectTransformConfig `json:"subject_transforms,omitempty"` 192 } 193 194 // StreamSource dictates how streams can source from other streams. 195 type StreamSource struct { 196 Name string `json:"name"` 197 OptStartSeq uint64 `json:"opt_start_seq,omitempty"` 198 OptStartTime *time.Time `json:"opt_start_time,omitempty"` 199 FilterSubject string `json:"filter_subject,omitempty"` 200 SubjectTransforms []SubjectTransformConfig `json:"subject_transforms,omitempty"` 201 External *ExternalStream `json:"external,omitempty"` 202 203 // Internal 204 iname string // For indexing when stream names are the same for multiple sources. 205 } 206 207 // ExternalStream allows you to qualify access to a stream source in another account. 208 type ExternalStream struct { 209 ApiPrefix string `json:"api"` 210 DeliverPrefix string `json:"deliver"` 211 } 212 213 // Stream is a jetstream stream of messages. When we receive a message internally destined 214 // for a Stream we will direct link from the client to this structure. 215 type stream struct { 216 mu sync.RWMutex 217 js *jetStream 218 jsa *jsAccount 219 acc *Account 220 srv *Server 221 client *client 222 sysc *client 223 sid atomic.Uint64 224 pubAck []byte 225 outq *jsOutQ 226 msgs *ipQueue[*inMsg] 227 gets *ipQueue[*directGetReq] 228 store StreamStore 229 ackq *ipQueue[uint64] 230 lseq uint64 231 lmsgId string 232 consumers map[string]*consumer 233 numFilter int // number of filtered consumers 234 cfg StreamConfig 235 created time.Time 236 stype StorageType 237 tier string 238 ddmap map[string]*ddentry 239 ddarr []*ddentry 240 ddindex int 241 ddtmr *time.Timer 242 qch chan struct{} 243 mqch chan struct{} 244 active bool 245 ddloaded bool 246 closed atomic.Bool 247 248 // Mirror 249 mirror *sourceInfo 250 251 // Sources 252 sources map[string]*sourceInfo 253 sourceRetries map[string]*time.Timer 254 sourcesConsumerSetup *time.Timer 255 256 // Indicates we have direct consumers. 257 directs int 258 259 // For input subject transform 260 itr *subjectTransform 261 262 // For republishing. 263 tr *subjectTransform 264 265 // For processing consumers without main stream lock. 266 clsMu sync.RWMutex 267 cList []*consumer 268 sch chan struct{} 269 sigq *ipQueue[*cMsg] 270 csl *Sublist // Consumer Sublist 271 // Leader will store seq/msgTrace in clustering mode. Used in applyStreamEntries 272 // to know if trace event should be sent after processing. 273 mt map[uint64]*msgTrace 274 275 // For non limits policy streams when they process an ack before the actual msg. 276 // Can happen in stretch clusters, multi-cloud, or during catchup for a restarted server. 277 preAcks map[uint64]map[*consumer]struct{} 278 279 // TODO(dlc) - Hide everything below behind two pointers. 280 // Clustered mode. 281 sa *streamAssignment 282 node RaftNode 283 catchup atomic.Bool 284 syncSub *subscription 285 infoSub *subscription 286 clMu sync.Mutex 287 clseq uint64 288 clfs uint64 289 inflight map[uint64]uint64 290 leader string 291 lqsent time.Time 292 catchups map[string]uint64 293 uch chan struct{} 294 compressOK bool 295 inMonitor bool 296 297 // Direct get subscription. 298 directSub *subscription 299 lastBySub *subscription 300 301 monitorWg sync.WaitGroup 302 } 303 304 type sourceInfo struct { 305 name string 306 iname string 307 cname string 308 sub *subscription 309 dsub *subscription 310 lbsub *subscription 311 msgs *ipQueue[*inMsg] 312 sseq uint64 313 dseq uint64 314 start time.Time 315 lag uint64 316 err *ApiError 317 fails int 318 last time.Time 319 lreq time.Time 320 qch chan struct{} 321 sip bool // setup in progress 322 wg sync.WaitGroup 323 sf string // subject filter 324 sfs []string // subject filters 325 trs []*subjectTransform // subject transforms 326 } 327 328 // For mirrors and direct get 329 const ( 330 dgetGroup = sysGroup 331 dgetCaughtUpThresh = 10 332 ) 333 334 // Headers for published messages. 335 const ( 336 JSMsgId = "Nats-Msg-Id" 337 JSExpectedStream = "Nats-Expected-Stream" 338 JSExpectedLastSeq = "Nats-Expected-Last-Sequence" 339 JSExpectedLastSubjSeq = "Nats-Expected-Last-Subject-Sequence" 340 JSExpectedLastMsgId = "Nats-Expected-Last-Msg-Id" 341 JSStreamSource = "Nats-Stream-Source" 342 JSLastConsumerSeq = "Nats-Last-Consumer" 343 JSLastStreamSeq = "Nats-Last-Stream" 344 JSConsumerStalled = "Nats-Consumer-Stalled" 345 JSMsgRollup = "Nats-Rollup" 346 JSMsgSize = "Nats-Msg-Size" 347 JSResponseType = "Nats-Response-Type" 348 ) 349 350 // Headers for republished messages and direct gets. 351 const ( 352 JSStream = "Nats-Stream" 353 JSSequence = "Nats-Sequence" 354 JSTimeStamp = "Nats-Time-Stamp" 355 JSSubject = "Nats-Subject" 356 JSLastSequence = "Nats-Last-Sequence" 357 JSNumPending = "Nats-Num-Pending" 358 JSUpToSequence = "Nats-UpTo-Sequence" 359 ) 360 361 // Rollups, can be subject only or all messages. 362 const ( 363 JSMsgRollupSubject = "sub" 364 JSMsgRollupAll = "all" 365 ) 366 367 const ( 368 jsCreateResponse = "create" 369 ) 370 371 // Dedupe entry 372 type ddentry struct { 373 id string 374 seq uint64 375 ts int64 376 } 377 378 // Replicas Range 379 const StreamMaxReplicas = 5 380 381 // AddStream adds a stream for the given account. 382 func (a *Account) addStream(config *StreamConfig) (*stream, error) { 383 return a.addStreamWithAssignment(config, nil, nil) 384 } 385 386 // AddStreamWithStore adds a stream for the given account with custome store config options. 387 func (a *Account) addStreamWithStore(config *StreamConfig, fsConfig *FileStoreConfig) (*stream, error) { 388 return a.addStreamWithAssignment(config, fsConfig, nil) 389 } 390 391 func (a *Account) addStreamWithAssignment(config *StreamConfig, fsConfig *FileStoreConfig, sa *streamAssignment) (*stream, error) { 392 s, jsa, err := a.checkForJetStream() 393 if err != nil { 394 return nil, err 395 } 396 397 // If we do not have the stream currently assigned to us in cluster mode we will proceed but warn. 398 // This can happen on startup with restored state where on meta replay we still do not have 399 // the assignment. Running in single server mode this always returns true. 400 if !jsa.streamAssigned(config.Name) { 401 s.Debugf("Stream '%s > %s' does not seem to be assigned to this server", a.Name, config.Name) 402 } 403 404 // Sensible defaults. 405 cfg, apiErr := s.checkStreamCfg(config, a) 406 if apiErr != nil { 407 return nil, apiErr 408 } 409 410 singleServerMode := !s.JetStreamIsClustered() && s.standAloneMode() 411 if singleServerMode && cfg.Replicas > 1 { 412 return nil, ApiErrors[JSStreamReplicasNotSupportedErr] 413 } 414 415 // Make sure we are ok when these are done in parallel. 416 // We used to call Add(1) in the "else" clause of the "if loaded" 417 // statement. This caused a data race because it was possible 418 // that one go routine stores (with count==0) and another routine 419 // gets "loaded==true" and calls wg.Wait() while the other routine 420 // then calls wg.Add(1). It also could mean that two routines execute 421 // the rest of the code concurrently. 422 swg := &sync.WaitGroup{} 423 swg.Add(1) 424 v, loaded := jsa.inflight.LoadOrStore(cfg.Name, swg) 425 wg := v.(*sync.WaitGroup) 426 if loaded { 427 wg.Wait() 428 // This waitgroup is "thrown away" (since there was an existing one). 429 swg.Done() 430 } else { 431 defer func() { 432 jsa.inflight.Delete(cfg.Name) 433 wg.Done() 434 }() 435 } 436 437 js, isClustered := jsa.jetStreamAndClustered() 438 jsa.mu.Lock() 439 if mset, ok := jsa.streams[cfg.Name]; ok { 440 jsa.mu.Unlock() 441 // Check to see if configs are same. 442 ocfg := mset.config() 443 444 // set the index name on cfg since it would not contain a value for iname while the return from mset.config() does to ensure the DeepEqual works 445 for _, s := range cfg.Sources { 446 s.setIndexName() 447 } 448 449 if reflect.DeepEqual(ocfg, cfg) { 450 if sa != nil { 451 mset.setStreamAssignment(sa) 452 } 453 return mset, nil 454 } else { 455 return nil, ApiErrors[JSStreamNameExistErr] 456 } 457 } 458 jsa.usageMu.RLock() 459 selected, tier, hasTier := jsa.selectLimits(&cfg) 460 jsa.usageMu.RUnlock() 461 reserved := int64(0) 462 if !isClustered { 463 reserved = jsa.tieredReservation(tier, &cfg) 464 } 465 jsa.mu.Unlock() 466 467 if !hasTier { 468 return nil, NewJSNoLimitsError() 469 } 470 js.mu.RLock() 471 if isClustered { 472 _, reserved = tieredStreamAndReservationCount(js.cluster.streams[a.Name], tier, &cfg) 473 } 474 if err := js.checkAllLimits(&selected, &cfg, reserved, 0); err != nil { 475 js.mu.RUnlock() 476 return nil, err 477 } 478 js.mu.RUnlock() 479 jsa.mu.Lock() 480 // Check for template ownership if present. 481 if cfg.Template != _EMPTY_ && jsa.account != nil { 482 if !jsa.checkTemplateOwnership(cfg.Template, cfg.Name) { 483 jsa.mu.Unlock() 484 return nil, fmt.Errorf("stream not owned by template") 485 } 486 } 487 488 // If mirror, check if the transforms (if any) are valid. 489 if cfg.Mirror != nil { 490 if len(cfg.Mirror.SubjectTransforms) == 0 { 491 if cfg.Mirror.FilterSubject != _EMPTY_ && !IsValidSubject(cfg.Mirror.FilterSubject) { 492 jsa.mu.Unlock() 493 return nil, fmt.Errorf("subject filter '%s' for the mirror %w", cfg.Mirror.FilterSubject, ErrBadSubject) 494 } 495 } else { 496 for _, st := range cfg.Mirror.SubjectTransforms { 497 if st.Source != _EMPTY_ && !IsValidSubject(st.Source) { 498 jsa.mu.Unlock() 499 return nil, fmt.Errorf("invalid subject transform source '%s' for the mirror: %w", st.Source, ErrBadSubject) 500 } 501 // check the transform, if any, is valid 502 if st.Destination != _EMPTY_ { 503 if _, err = NewSubjectTransform(st.Source, st.Destination); err != nil { 504 jsa.mu.Unlock() 505 return nil, fmt.Errorf("subject transform from '%s' to '%s' for the mirror: %w", st.Source, st.Destination, err) 506 } 507 } 508 } 509 } 510 } 511 512 // Setup our internal indexed names here for sources and check if the transforms (if any) are valid. 513 for _, ssi := range cfg.Sources { 514 if len(ssi.SubjectTransforms) == 0 { 515 // check the filter, if any, is valid 516 if ssi.FilterSubject != _EMPTY_ && !IsValidSubject(ssi.FilterSubject) { 517 jsa.mu.Unlock() 518 return nil, fmt.Errorf("subject filter '%s' for the source: %w", ssi.FilterSubject, ErrBadSubject) 519 } 520 } else { 521 for _, st := range ssi.SubjectTransforms { 522 if st.Source != _EMPTY_ && !IsValidSubject(st.Source) { 523 jsa.mu.Unlock() 524 return nil, fmt.Errorf("subject filter '%s' for the source: %w", st.Source, ErrBadSubject) 525 } 526 // check the transform, if any, is valid 527 if st.Destination != _EMPTY_ { 528 if _, err = NewSubjectTransform(st.Source, st.Destination); err != nil { 529 jsa.mu.Unlock() 530 return nil, fmt.Errorf("subject transform from '%s' to '%s' for the source: %w", st.Source, st.Destination, err) 531 } 532 } 533 } 534 } 535 } 536 537 // Check for overlapping subjects with other streams. 538 // These are not allowed for now. 539 if jsa.subjectsOverlap(cfg.Subjects, nil) { 540 jsa.mu.Unlock() 541 return nil, NewJSStreamSubjectOverlapError() 542 } 543 544 if !hasTier { 545 jsa.mu.Unlock() 546 return nil, fmt.Errorf("no applicable tier found") 547 } 548 549 // Setup the internal clients. 550 c := s.createInternalJetStreamClient() 551 ic := s.createInternalJetStreamClient() 552 553 qpfx := fmt.Sprintf("[ACC:%s] stream '%s' ", a.Name, config.Name) 554 mset := &stream{ 555 acc: a, 556 jsa: jsa, 557 cfg: cfg, 558 js: js, 559 srv: s, 560 client: c, 561 sysc: ic, 562 tier: tier, 563 stype: cfg.Storage, 564 consumers: make(map[string]*consumer), 565 msgs: newIPQueue[*inMsg](s, qpfx+"messages"), 566 gets: newIPQueue[*directGetReq](s, qpfx+"direct gets"), 567 qch: make(chan struct{}), 568 mqch: make(chan struct{}), 569 uch: make(chan struct{}, 4), 570 sch: make(chan struct{}, 1), 571 } 572 573 // Start our signaling routine to process consumers. 574 mset.sigq = newIPQueue[*cMsg](s, qpfx+"obs") // of *cMsg 575 go mset.signalConsumersLoop() 576 577 // For no-ack consumers when we are interest retention. 578 if cfg.Retention != LimitsPolicy { 579 mset.ackq = newIPQueue[uint64](s, qpfx+"acks") 580 } 581 582 // Check for input subject transform 583 if cfg.SubjectTransform != nil { 584 tr, err := NewSubjectTransform(cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination) 585 if err != nil { 586 jsa.mu.Unlock() 587 return nil, fmt.Errorf("stream subject transform from '%s' to '%s': %w", cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination, err) 588 } 589 mset.itr = tr 590 } 591 592 // Check for RePublish. 593 if cfg.RePublish != nil { 594 tr, err := NewSubjectTransform(cfg.RePublish.Source, cfg.RePublish.Destination) 595 if err != nil { 596 jsa.mu.Unlock() 597 return nil, fmt.Errorf("stream republish transform from '%s' to '%s': %w", cfg.RePublish.Source, cfg.RePublish.Destination, err) 598 } 599 // Assign our transform for republishing. 600 mset.tr = tr 601 } 602 storeDir := filepath.Join(jsa.storeDir, streamsDir, cfg.Name) 603 jsa.mu.Unlock() 604 605 // Bind to the user account. 606 c.registerWithAccount(a) 607 // Bind to the system account. 608 ic.registerWithAccount(s.SystemAccount()) 609 610 // Create the appropriate storage 611 fsCfg := fsConfig 612 if fsCfg == nil { 613 fsCfg = &FileStoreConfig{} 614 // If we are file based and not explicitly configured 615 // we may be able to auto-tune based on max msgs or bytes. 616 if cfg.Storage == FileStorage { 617 mset.autoTuneFileStorageBlockSize(fsCfg) 618 } 619 } 620 fsCfg.StoreDir = storeDir 621 fsCfg.AsyncFlush = false 622 // Grab configured sync interval. 623 fsCfg.SyncInterval = s.getOpts().SyncInterval 624 fsCfg.SyncAlways = s.getOpts().SyncAlways 625 fsCfg.Compression = config.Compression 626 627 if err := mset.setupStore(fsCfg); err != nil { 628 mset.stop(true, false) 629 return nil, NewJSStreamStoreFailedError(err) 630 } 631 632 // Create our pubAck template here. Better than json marshal each time on success. 633 if domain := s.getOpts().JetStreamDomain; domain != _EMPTY_ { 634 mset.pubAck = []byte(fmt.Sprintf("{%q:%q, %q:%q, %q:", "stream", cfg.Name, "domain", domain, "seq")) 635 } else { 636 mset.pubAck = []byte(fmt.Sprintf("{%q:%q, %q:", "stream", cfg.Name, "seq")) 637 } 638 end := len(mset.pubAck) 639 mset.pubAck = mset.pubAck[:end:end] 640 641 // Set our known last sequence. 642 var state StreamState 643 mset.store.FastState(&state) 644 645 // Possible race with consumer.setLeader during recovery. 646 mset.mu.RLock() 647 mset.lseq = state.LastSeq 648 mset.mu.RUnlock() 649 650 // If no msgs (new stream), set dedupe state loaded to true. 651 if state.Msgs == 0 { 652 mset.ddloaded = true 653 } 654 655 // Set our stream assignment if in clustered mode. 656 if sa != nil { 657 mset.setStreamAssignment(sa) 658 } 659 660 // Setup our internal send go routine. 661 mset.setupSendCapabilities() 662 663 // Reserve resources if MaxBytes present. 664 mset.js.reserveStreamResources(&mset.cfg) 665 666 // Call directly to set leader if not in clustered mode. 667 // This can be called though before we actually setup clustering, so check both. 668 if singleServerMode { 669 if err := mset.setLeader(true); err != nil { 670 mset.stop(true, false) 671 return nil, err 672 } 673 } 674 675 // This is always true in single server mode. 676 if mset.IsLeader() { 677 // Send advisory. 678 var suppress bool 679 if !s.standAloneMode() && sa == nil { 680 if cfg.Replicas > 1 { 681 suppress = true 682 } 683 } else if sa != nil { 684 suppress = sa.responded 685 } 686 if !suppress { 687 mset.sendCreateAdvisory() 688 } 689 } 690 691 // Register with our account last. 692 jsa.mu.Lock() 693 jsa.streams[cfg.Name] = mset 694 jsa.mu.Unlock() 695 696 return mset, nil 697 } 698 699 // Composes the index name. Contains the stream name, subject filter, and transform destination 700 // when the stream is external we will use additional information in case the (external) stream names are the same. 701 func (ssi *StreamSource) composeIName() string { 702 var iName = ssi.Name 703 704 if ssi.External != nil { 705 iName = iName + ":" + getHash(ssi.External.ApiPrefix) 706 } 707 708 source := ssi.FilterSubject 709 destination := fwcs 710 711 if len(ssi.SubjectTransforms) == 0 { 712 // normalize filter and destination in case they are empty 713 if source == _EMPTY_ { 714 source = fwcs 715 } 716 if destination == _EMPTY_ { 717 destination = fwcs 718 } 719 } else { 720 var sources, destinations []string 721 722 for _, tr := range ssi.SubjectTransforms { 723 trsrc, trdest := tr.Source, tr.Destination 724 if trsrc == _EMPTY_ { 725 trsrc = fwcs 726 } 727 if trdest == _EMPTY_ { 728 trdest = fwcs 729 } 730 sources = append(sources, trsrc) 731 destinations = append(destinations, trdest) 732 } 733 source = strings.Join(sources, "\f") 734 destination = strings.Join(destinations, "\f") 735 } 736 737 return strings.Join([]string{iName, source, destination}, " ") 738 } 739 740 // Sets the index name. 741 func (ssi *StreamSource) setIndexName() { 742 ssi.iname = ssi.composeIName() 743 } 744 745 func (mset *stream) streamAssignment() *streamAssignment { 746 mset.mu.RLock() 747 defer mset.mu.RUnlock() 748 return mset.sa 749 } 750 751 func (mset *stream) setStreamAssignment(sa *streamAssignment) { 752 var node RaftNode 753 var peers []string 754 755 mset.mu.RLock() 756 js := mset.js 757 mset.mu.RUnlock() 758 759 if js != nil { 760 js.mu.RLock() 761 if sa.Group != nil { 762 node = sa.Group.node 763 peers = sa.Group.Peers 764 } 765 js.mu.RUnlock() 766 } 767 768 mset.mu.Lock() 769 defer mset.mu.Unlock() 770 771 mset.sa = sa 772 if sa == nil { 773 return 774 } 775 776 // Set our node. 777 mset.node = node 778 if mset.node != nil { 779 mset.node.UpdateKnownPeers(peers) 780 } 781 782 // Setup our info sub here as well for all stream members. This is now by design. 783 if mset.infoSub == nil { 784 isubj := fmt.Sprintf(clusterStreamInfoT, mset.jsa.acc(), mset.cfg.Name) 785 // Note below the way we subscribe here is so that we can send requests to ourselves. 786 mset.infoSub, _ = mset.srv.systemSubscribe(isubj, _EMPTY_, false, mset.sysc, mset.handleClusterStreamInfoRequest) 787 } 788 789 // Trigger update chan. 790 select { 791 case mset.uch <- struct{}{}: 792 default: 793 } 794 } 795 796 func (mset *stream) monitorQuitC() <-chan struct{} { 797 if mset == nil { 798 return nil 799 } 800 mset.mu.RLock() 801 defer mset.mu.RUnlock() 802 return mset.mqch 803 } 804 805 func (mset *stream) updateC() <-chan struct{} { 806 if mset == nil { 807 return nil 808 } 809 mset.mu.RLock() 810 defer mset.mu.RUnlock() 811 return mset.uch 812 } 813 814 // IsLeader will return if we are the current leader. 815 func (mset *stream) IsLeader() bool { 816 mset.mu.RLock() 817 defer mset.mu.RUnlock() 818 return mset.isLeader() 819 } 820 821 // Lock should be held. 822 func (mset *stream) isLeader() bool { 823 if mset.isClustered() { 824 return mset.node.Leader() 825 } 826 return true 827 } 828 829 // TODO(dlc) - Check to see if we can accept being the leader or we should should step down. 830 func (mset *stream) setLeader(isLeader bool) error { 831 mset.mu.Lock() 832 // If we are here we have a change in leader status. 833 if isLeader { 834 // Make sure we are listening for sync requests. 835 // TODO(dlc) - Original design was that all in sync members of the group would do DQ. 836 mset.startClusterSubs() 837 // Setup subscriptions 838 if err := mset.subscribeToStream(); err != nil { 839 mset.mu.Unlock() 840 return err 841 } 842 } else { 843 // cancel timer to create the source consumers if not fired yet 844 if mset.sourcesConsumerSetup != nil { 845 mset.sourcesConsumerSetup.Stop() 846 mset.sourcesConsumerSetup = nil 847 } 848 // Stop responding to sync requests. 849 mset.stopClusterSubs() 850 // Unsubscribe from direct stream. 851 mset.unsubscribeToStream(false) 852 // Clear catchup state 853 mset.clearAllCatchupPeers() 854 } 855 // Track group leader. 856 if mset.isClustered() { 857 mset.leader = mset.node.GroupLeader() 858 } else { 859 mset.leader = _EMPTY_ 860 } 861 mset.mu.Unlock() 862 return nil 863 } 864 865 // Lock should be held. 866 func (mset *stream) startClusterSubs() { 867 if mset.isClustered() && mset.syncSub == nil { 868 mset.syncSub, _ = mset.srv.systemSubscribe(mset.sa.Sync, _EMPTY_, false, mset.sysc, mset.handleClusterSyncRequest) 869 } 870 } 871 872 // Lock should be held. 873 func (mset *stream) stopClusterSubs() { 874 if mset.syncSub != nil { 875 mset.srv.sysUnsubscribe(mset.syncSub) 876 mset.syncSub = nil 877 } 878 } 879 880 // account gets the account for this stream. 881 func (mset *stream) account() *Account { 882 mset.mu.RLock() 883 jsa := mset.jsa 884 mset.mu.RUnlock() 885 if jsa == nil { 886 return nil 887 } 888 return jsa.acc() 889 } 890 891 // Helper to determine the max msg size for this stream if file based. 892 func (mset *stream) maxMsgSize() uint64 { 893 maxMsgSize := mset.cfg.MaxMsgSize 894 if maxMsgSize <= 0 { 895 // Pull from the account. 896 if mset.jsa != nil { 897 if acc := mset.jsa.acc(); acc != nil { 898 acc.mu.RLock() 899 maxMsgSize = acc.mpay 900 acc.mu.RUnlock() 901 } 902 } 903 // If all else fails use default. 904 if maxMsgSize <= 0 { 905 maxMsgSize = MAX_PAYLOAD_SIZE 906 } 907 } 908 // Now determine an estimation for the subjects etc. 909 maxSubject := -1 910 for _, subj := range mset.cfg.Subjects { 911 if subjectIsLiteral(subj) { 912 if len(subj) > maxSubject { 913 maxSubject = len(subj) 914 } 915 } 916 } 917 if maxSubject < 0 { 918 const defaultMaxSubject = 256 919 maxSubject = defaultMaxSubject 920 } 921 // filestore will add in estimates for record headers, etc. 922 return fileStoreMsgSizeEstimate(maxSubject, int(maxMsgSize)) 923 } 924 925 // If we are file based and the file storage config was not explicitly set 926 // we can autotune block sizes to better match. Our target will be to store 125% 927 // of the theoretical limit. We will round up to nearest 100 bytes as well. 928 func (mset *stream) autoTuneFileStorageBlockSize(fsCfg *FileStoreConfig) { 929 var totalEstSize uint64 930 931 // MaxBytes will take precedence for now. 932 if mset.cfg.MaxBytes > 0 { 933 totalEstSize = uint64(mset.cfg.MaxBytes) 934 } else if mset.cfg.MaxMsgs > 0 { 935 // Determine max message size to estimate. 936 totalEstSize = mset.maxMsgSize() * uint64(mset.cfg.MaxMsgs) 937 } else if mset.cfg.MaxMsgsPer > 0 { 938 fsCfg.BlockSize = uint64(defaultKVBlockSize) 939 return 940 } else { 941 // If nothing set will let underlying filestore determine blkSize. 942 return 943 } 944 945 blkSize := (totalEstSize / 4) + 1 // (25% overhead) 946 // Round up to nearest 100 947 if m := blkSize % 100; m != 0 { 948 blkSize += 100 - m 949 } 950 if blkSize <= FileStoreMinBlkSize { 951 blkSize = FileStoreMinBlkSize 952 } else if blkSize >= FileStoreMaxBlkSize { 953 blkSize = FileStoreMaxBlkSize 954 } else { 955 blkSize = defaultMediumBlockSize 956 } 957 fsCfg.BlockSize = uint64(blkSize) 958 } 959 960 // rebuildDedupe will rebuild any dedupe structures needed after recovery of a stream. 961 // Will be called lazily to avoid penalizing startup times. 962 // TODO(dlc) - Might be good to know if this should be checked at all for streams with no 963 // headers and msgId in them. Would need signaling from the storage layer. 964 // Lock should be held. 965 func (mset *stream) rebuildDedupe() { 966 if mset.ddloaded { 967 return 968 } 969 970 mset.ddloaded = true 971 972 // We have some messages. Lookup starting sequence by duplicate time window. 973 sseq := mset.store.GetSeqFromTime(time.Now().Add(-mset.cfg.Duplicates)) 974 if sseq == 0 { 975 return 976 } 977 978 var smv StoreMsg 979 var state StreamState 980 mset.store.FastState(&state) 981 982 for seq := sseq; seq <= state.LastSeq; seq++ { 983 sm, err := mset.store.LoadMsg(seq, &smv) 984 if err != nil { 985 continue 986 } 987 var msgId string 988 if len(sm.hdr) > 0 { 989 if msgId = getMsgId(sm.hdr); msgId != _EMPTY_ { 990 mset.storeMsgIdLocked(&ddentry{msgId, sm.seq, sm.ts}) 991 } 992 } 993 if seq == state.LastSeq { 994 mset.lmsgId = msgId 995 } 996 } 997 } 998 999 func (mset *stream) lastSeqAndCLFS() (uint64, uint64) { 1000 mset.mu.RLock() 1001 defer mset.mu.RUnlock() 1002 return mset.lseq, mset.getCLFS() 1003 } 1004 1005 func (mset *stream) getCLFS() uint64 { 1006 mset.clMu.Lock() 1007 defer mset.clMu.Unlock() 1008 return mset.clfs 1009 } 1010 1011 func (mset *stream) setCLFS(clfs uint64) { 1012 mset.clMu.Lock() 1013 mset.clfs = clfs 1014 mset.clMu.Unlock() 1015 } 1016 1017 func (mset *stream) lastSeq() uint64 { 1018 mset.mu.RLock() 1019 lseq := mset.lseq 1020 mset.mu.RUnlock() 1021 return lseq 1022 } 1023 1024 func (mset *stream) setLastSeq(lseq uint64) { 1025 mset.mu.Lock() 1026 mset.lseq = lseq 1027 mset.mu.Unlock() 1028 } 1029 1030 func (mset *stream) sendCreateAdvisory() { 1031 mset.mu.RLock() 1032 name := mset.cfg.Name 1033 template := mset.cfg.Template 1034 outq := mset.outq 1035 srv := mset.srv 1036 mset.mu.RUnlock() 1037 1038 if outq == nil { 1039 return 1040 } 1041 1042 // finally send an event that this stream was created 1043 m := JSStreamActionAdvisory{ 1044 TypedEvent: TypedEvent{ 1045 Type: JSStreamActionAdvisoryType, 1046 ID: nuid.Next(), 1047 Time: time.Now().UTC(), 1048 }, 1049 Stream: name, 1050 Action: CreateEvent, 1051 Template: template, 1052 Domain: srv.getOpts().JetStreamDomain, 1053 } 1054 1055 j, err := json.Marshal(m) 1056 if err != nil { 1057 return 1058 } 1059 1060 subj := JSAdvisoryStreamCreatedPre + "." + name 1061 outq.sendMsg(subj, j) 1062 } 1063 1064 func (mset *stream) sendDeleteAdvisoryLocked() { 1065 if mset.outq == nil { 1066 return 1067 } 1068 1069 m := JSStreamActionAdvisory{ 1070 TypedEvent: TypedEvent{ 1071 Type: JSStreamActionAdvisoryType, 1072 ID: nuid.Next(), 1073 Time: time.Now().UTC(), 1074 }, 1075 Stream: mset.cfg.Name, 1076 Action: DeleteEvent, 1077 Template: mset.cfg.Template, 1078 Domain: mset.srv.getOpts().JetStreamDomain, 1079 } 1080 1081 j, err := json.Marshal(m) 1082 if err == nil { 1083 subj := JSAdvisoryStreamDeletedPre + "." + mset.cfg.Name 1084 mset.outq.sendMsg(subj, j) 1085 } 1086 } 1087 1088 func (mset *stream) sendUpdateAdvisoryLocked() { 1089 if mset.outq == nil { 1090 return 1091 } 1092 1093 m := JSStreamActionAdvisory{ 1094 TypedEvent: TypedEvent{ 1095 Type: JSStreamActionAdvisoryType, 1096 ID: nuid.Next(), 1097 Time: time.Now().UTC(), 1098 }, 1099 Stream: mset.cfg.Name, 1100 Action: ModifyEvent, 1101 Domain: mset.srv.getOpts().JetStreamDomain, 1102 } 1103 1104 j, err := json.Marshal(m) 1105 if err == nil { 1106 subj := JSAdvisoryStreamUpdatedPre + "." + mset.cfg.Name 1107 mset.outq.sendMsg(subj, j) 1108 } 1109 } 1110 1111 // Created returns created time. 1112 func (mset *stream) createdTime() time.Time { 1113 mset.mu.RLock() 1114 created := mset.created 1115 mset.mu.RUnlock() 1116 return created 1117 } 1118 1119 // Internal to allow creation time to be restored. 1120 func (mset *stream) setCreatedTime(created time.Time) { 1121 mset.mu.Lock() 1122 mset.created = created 1123 mset.mu.Unlock() 1124 } 1125 1126 // subjectsOverlap to see if these subjects overlap with existing subjects. 1127 // Use only for non-clustered JetStream 1128 // RLock minimum should be held. 1129 func (jsa *jsAccount) subjectsOverlap(subjects []string, self *stream) bool { 1130 for _, mset := range jsa.streams { 1131 if self != nil && mset == self { 1132 continue 1133 } 1134 for _, subj := range mset.cfg.Subjects { 1135 for _, tsubj := range subjects { 1136 if SubjectsCollide(tsubj, subj) { 1137 return true 1138 } 1139 } 1140 } 1141 } 1142 return false 1143 } 1144 1145 // StreamDefaultDuplicatesWindow default duplicates window. 1146 const StreamDefaultDuplicatesWindow = 2 * time.Minute 1147 1148 func (s *Server) checkStreamCfg(config *StreamConfig, acc *Account) (StreamConfig, *ApiError) { 1149 lim := &s.getOpts().JetStreamLimits 1150 1151 if config == nil { 1152 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration invalid")) 1153 } 1154 if !isValidName(config.Name) { 1155 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream name is required and can not contain '.', '*', '>'")) 1156 } 1157 if len(config.Name) > JSMaxNameLen { 1158 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream name is too long, maximum allowed is %d", JSMaxNameLen)) 1159 } 1160 if len(config.Description) > JSMaxDescriptionLen { 1161 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream description is too long, maximum allowed is %d", JSMaxDescriptionLen)) 1162 } 1163 1164 var metadataLen int 1165 for k, v := range config.Metadata { 1166 metadataLen += len(k) + len(v) 1167 } 1168 if metadataLen > JSMaxMetadataLen { 1169 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream metadata exceeds maximum size of %d bytes", JSMaxMetadataLen)) 1170 } 1171 1172 cfg := *config 1173 1174 // Make file the default. 1175 if cfg.Storage == 0 { 1176 cfg.Storage = FileStorage 1177 } 1178 if cfg.Replicas == 0 { 1179 cfg.Replicas = 1 1180 } 1181 if cfg.Replicas > StreamMaxReplicas { 1182 return cfg, NewJSStreamInvalidConfigError(fmt.Errorf("maximum replicas is %d", StreamMaxReplicas)) 1183 } 1184 if cfg.Replicas < 0 { 1185 return cfg, NewJSReplicasCountCannotBeNegativeError() 1186 } 1187 if cfg.MaxMsgs == 0 { 1188 cfg.MaxMsgs = -1 1189 } 1190 if cfg.MaxMsgsPer == 0 { 1191 cfg.MaxMsgsPer = -1 1192 } 1193 if cfg.MaxBytes == 0 { 1194 cfg.MaxBytes = -1 1195 } 1196 if cfg.MaxMsgSize == 0 { 1197 cfg.MaxMsgSize = -1 1198 } 1199 if cfg.MaxConsumers == 0 { 1200 cfg.MaxConsumers = -1 1201 } 1202 if cfg.Duplicates == 0 && cfg.Mirror == nil { 1203 maxWindow := StreamDefaultDuplicatesWindow 1204 if lim.Duplicates > 0 && maxWindow > lim.Duplicates { 1205 maxWindow = lim.Duplicates 1206 } 1207 if cfg.MaxAge != 0 && cfg.MaxAge < maxWindow { 1208 cfg.Duplicates = cfg.MaxAge 1209 } else { 1210 cfg.Duplicates = maxWindow 1211 } 1212 } 1213 if cfg.MaxAge > 0 && cfg.MaxAge < 100*time.Millisecond { 1214 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("max age needs to be >= 100ms")) 1215 } 1216 if cfg.Duplicates < 0 { 1217 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("duplicates window can not be negative")) 1218 } 1219 // Check that duplicates is not larger then age if set. 1220 if cfg.MaxAge != 0 && cfg.Duplicates > cfg.MaxAge { 1221 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("duplicates window can not be larger then max age")) 1222 } 1223 if lim.Duplicates > 0 && cfg.Duplicates > lim.Duplicates { 1224 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("duplicates window can not be larger then server limit of %v", 1225 lim.Duplicates.String())) 1226 } 1227 if cfg.Duplicates > 0 && cfg.Duplicates < 100*time.Millisecond { 1228 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("duplicates window needs to be >= 100ms")) 1229 } 1230 1231 if cfg.DenyPurge && cfg.AllowRollup { 1232 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("roll-ups require the purge permission")) 1233 } 1234 1235 // Check for new discard new per subject, we require the discard policy to also be new. 1236 if cfg.DiscardNewPer { 1237 if cfg.Discard != DiscardNew { 1238 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("discard new per subject requires discard new policy to be set")) 1239 } 1240 if cfg.MaxMsgsPer <= 0 { 1241 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("discard new per subject requires max msgs per subject > 0")) 1242 } 1243 } 1244 1245 getStream := func(streamName string) (bool, StreamConfig) { 1246 var exists bool 1247 var cfg StreamConfig 1248 if s.JetStreamIsClustered() { 1249 if js, _ := s.getJetStreamCluster(); js != nil { 1250 js.mu.RLock() 1251 if sa := js.streamAssignment(acc.Name, streamName); sa != nil { 1252 cfg = *sa.Config 1253 exists = true 1254 } 1255 js.mu.RUnlock() 1256 } 1257 } else if mset, err := acc.lookupStream(streamName); err == nil { 1258 cfg = mset.cfg 1259 exists = true 1260 } 1261 return exists, cfg 1262 } 1263 1264 hasStream := func(streamName string) (bool, int32, []string) { 1265 exists, cfg := getStream(streamName) 1266 return exists, cfg.MaxMsgSize, cfg.Subjects 1267 } 1268 1269 var streamSubs []string 1270 var deliveryPrefixes []string 1271 var apiPrefixes []string 1272 1273 // Do some pre-checking for mirror config to avoid cycles in clustered mode. 1274 if cfg.Mirror != nil { 1275 if cfg.FirstSeq > 0 { 1276 return StreamConfig{}, NewJSMirrorWithFirstSeqError() 1277 } 1278 if len(cfg.Subjects) > 0 { 1279 return StreamConfig{}, NewJSMirrorWithSubjectsError() 1280 } 1281 if len(cfg.Sources) > 0 { 1282 return StreamConfig{}, NewJSMirrorWithSourcesError() 1283 } 1284 if cfg.Mirror.FilterSubject != _EMPTY_ && len(cfg.Mirror.SubjectTransforms) != 0 { 1285 return StreamConfig{}, NewJSMirrorMultipleFiltersNotAllowedError() 1286 } 1287 // Check subject filters overlap. 1288 for outer, tr := range cfg.Mirror.SubjectTransforms { 1289 if !IsValidSubject(tr.Source) { 1290 return StreamConfig{}, NewJSMirrorInvalidSubjectFilterError() 1291 } 1292 for inner, innertr := range cfg.Mirror.SubjectTransforms { 1293 if inner != outer && subjectIsSubsetMatch(tr.Source, innertr.Source) { 1294 return StreamConfig{}, NewJSMirrorOverlappingSubjectFiltersError() 1295 } 1296 } 1297 } 1298 // Do not perform checks if External is provided, as it could lead to 1299 // checking against itself (if sourced stream name is the same on different JetStream) 1300 if cfg.Mirror.External == nil { 1301 if !isValidName(cfg.Mirror.Name) { 1302 return StreamConfig{}, NewJSMirrorInvalidStreamNameError() 1303 } 1304 // We do not require other stream to exist anymore, but if we can see it check payloads. 1305 exists, maxMsgSize, subs := hasStream(cfg.Mirror.Name) 1306 if len(subs) > 0 { 1307 streamSubs = append(streamSubs, subs...) 1308 } 1309 if exists { 1310 if cfg.MaxMsgSize > 0 && maxMsgSize > 0 && cfg.MaxMsgSize < maxMsgSize { 1311 return StreamConfig{}, NewJSMirrorMaxMessageSizeTooBigError() 1312 } 1313 } 1314 // Determine if we are inheriting direct gets. 1315 if exists, ocfg := getStream(cfg.Mirror.Name); exists { 1316 cfg.MirrorDirect = ocfg.AllowDirect 1317 } else if js := s.getJetStream(); js != nil && js.isClustered() { 1318 // Could not find it here. If we are clustered we can look it up. 1319 js.mu.RLock() 1320 if cc := js.cluster; cc != nil { 1321 if as := cc.streams[acc.Name]; as != nil { 1322 if sa := as[cfg.Mirror.Name]; sa != nil { 1323 cfg.MirrorDirect = sa.Config.AllowDirect 1324 } 1325 } 1326 } 1327 js.mu.RUnlock() 1328 } 1329 } else { 1330 if cfg.Mirror.External.DeliverPrefix != _EMPTY_ { 1331 deliveryPrefixes = append(deliveryPrefixes, cfg.Mirror.External.DeliverPrefix) 1332 } 1333 if cfg.Mirror.External.ApiPrefix != _EMPTY_ { 1334 apiPrefixes = append(apiPrefixes, cfg.Mirror.External.ApiPrefix) 1335 } 1336 1337 } 1338 } 1339 1340 // check for duplicates 1341 var iNames = make(map[string]struct{}) 1342 for _, src := range cfg.Sources { 1343 if !isValidName(src.Name) { 1344 return StreamConfig{}, NewJSSourceInvalidStreamNameError() 1345 } 1346 if _, ok := iNames[src.composeIName()]; !ok { 1347 iNames[src.composeIName()] = struct{}{} 1348 } else { 1349 return StreamConfig{}, NewJSSourceDuplicateDetectedError() 1350 } 1351 // Do not perform checks if External is provided, as it could lead to 1352 // checking against itself (if sourced stream name is the same on different JetStream) 1353 if src.External == nil { 1354 exists, maxMsgSize, subs := hasStream(src.Name) 1355 if len(subs) > 0 { 1356 streamSubs = append(streamSubs, subs...) 1357 } 1358 if exists { 1359 if cfg.MaxMsgSize > 0 && maxMsgSize > 0 && cfg.MaxMsgSize < maxMsgSize { 1360 return StreamConfig{}, NewJSSourceMaxMessageSizeTooBigError() 1361 } 1362 } 1363 1364 if src.FilterSubject != _EMPTY_ && len(src.SubjectTransforms) != 0 { 1365 return StreamConfig{}, NewJSSourceMultipleFiltersNotAllowedError() 1366 } 1367 1368 for _, tr := range src.SubjectTransforms { 1369 err := ValidateMappingDestination(tr.Destination) 1370 if err != nil { 1371 return StreamConfig{}, NewJSSourceInvalidTransformDestinationError() 1372 } 1373 } 1374 1375 // Check subject filters overlap. 1376 for outer, tr := range src.SubjectTransforms { 1377 if !IsValidSubject(tr.Source) { 1378 return StreamConfig{}, NewJSSourceInvalidSubjectFilterError() 1379 } 1380 for inner, innertr := range src.SubjectTransforms { 1381 if inner != outer && subjectIsSubsetMatch(tr.Source, innertr.Source) { 1382 return StreamConfig{}, NewJSSourceOverlappingSubjectFiltersError() 1383 } 1384 } 1385 } 1386 continue 1387 } else { 1388 if src.External.DeliverPrefix != _EMPTY_ { 1389 deliveryPrefixes = append(deliveryPrefixes, src.External.DeliverPrefix) 1390 } 1391 if src.External.ApiPrefix != _EMPTY_ { 1392 apiPrefixes = append(apiPrefixes, src.External.ApiPrefix) 1393 } 1394 } 1395 } 1396 1397 // check prefix overlap with subjects 1398 for _, pfx := range deliveryPrefixes { 1399 if !IsValidPublishSubject(pfx) { 1400 return StreamConfig{}, NewJSStreamInvalidExternalDeliverySubjError(pfx) 1401 } 1402 for _, sub := range streamSubs { 1403 if SubjectsCollide(sub, fmt.Sprintf("%s.%s", pfx, sub)) { 1404 return StreamConfig{}, NewJSStreamExternalDelPrefixOverlapsError(pfx, sub) 1405 } 1406 } 1407 } 1408 // check if api prefixes overlap 1409 for _, apiPfx := range apiPrefixes { 1410 if !IsValidPublishSubject(apiPfx) { 1411 return StreamConfig{}, NewJSStreamInvalidConfigError( 1412 fmt.Errorf("stream external api prefix %q must be a valid subject without wildcards", apiPfx)) 1413 } 1414 if SubjectsCollide(apiPfx, JSApiPrefix) { 1415 return StreamConfig{}, NewJSStreamExternalApiOverlapError(apiPfx, JSApiPrefix) 1416 } 1417 } 1418 1419 // cycle check for source cycle 1420 toVisit := []*StreamConfig{&cfg} 1421 visited := make(map[string]struct{}) 1422 overlaps := func(subjects []string, filter string) bool { 1423 if filter == _EMPTY_ { 1424 return true 1425 } 1426 for _, subject := range subjects { 1427 if SubjectsCollide(subject, filter) { 1428 return true 1429 } 1430 } 1431 return false 1432 } 1433 1434 for len(toVisit) > 0 { 1435 cfg := toVisit[0] 1436 toVisit = toVisit[1:] 1437 visited[cfg.Name] = struct{}{} 1438 for _, src := range cfg.Sources { 1439 if src.External != nil { 1440 continue 1441 } 1442 // We can detect a cycle between streams, but let's double check that the 1443 // subjects actually form a cycle. 1444 if _, ok := visited[src.Name]; ok { 1445 if overlaps(cfg.Subjects, src.FilterSubject) { 1446 return StreamConfig{}, NewJSStreamInvalidConfigError(errors.New("detected cycle")) 1447 } 1448 } else if exists, cfg := getStream(src.Name); exists { 1449 toVisit = append(toVisit, &cfg) 1450 } 1451 } 1452 // Avoid cycles hiding behind mirrors 1453 if m := cfg.Mirror; m != nil { 1454 if m.External == nil { 1455 if _, ok := visited[m.Name]; ok { 1456 return StreamConfig{}, NewJSStreamInvalidConfigError(errors.New("detected cycle")) 1457 } 1458 if exists, cfg := getStream(m.Name); exists { 1459 toVisit = append(toVisit, &cfg) 1460 } 1461 } 1462 } 1463 } 1464 1465 if len(cfg.Subjects) == 0 { 1466 if cfg.Mirror == nil && len(cfg.Sources) == 0 { 1467 cfg.Subjects = append(cfg.Subjects, cfg.Name) 1468 } 1469 } else { 1470 if cfg.Mirror != nil { 1471 return StreamConfig{}, NewJSMirrorWithSubjectsError() 1472 } 1473 1474 // Check for literal duplication of subject interest in config 1475 // and no overlap with any JS API subject space 1476 dset := make(map[string]struct{}, len(cfg.Subjects)) 1477 for _, subj := range cfg.Subjects { 1478 if _, ok := dset[subj]; ok { 1479 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("duplicate subjects detected")) 1480 } 1481 // Also check to make sure we do not overlap with our $JS API subjects. 1482 if subjectIsSubsetMatch(subj, "$JS.API.>") { 1483 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("subjects overlap with jetstream api")) 1484 } 1485 // Make sure the subject is valid. 1486 if !IsValidSubject(subj) { 1487 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("invalid subject")) 1488 } 1489 // Mark for duplicate check. 1490 dset[subj] = struct{}{} 1491 } 1492 } 1493 1494 if len(cfg.Subjects) == 0 && len(cfg.Sources) == 0 && cfg.Mirror == nil { 1495 return StreamConfig{}, NewJSStreamInvalidConfigError( 1496 fmt.Errorf("stream needs at least one configured subject or be a source/mirror")) 1497 } 1498 1499 // Check for MaxBytes required and it's limit 1500 if required, limit := acc.maxBytesLimits(&cfg); required && cfg.MaxBytes <= 0 { 1501 return StreamConfig{}, NewJSStreamMaxBytesRequiredError() 1502 } else if limit > 0 && cfg.MaxBytes > limit { 1503 return StreamConfig{}, NewJSStreamMaxStreamBytesExceededError() 1504 } 1505 1506 // Now check if we have multiple subjects they we do not overlap ourselves 1507 // which would cause duplicate entries (assuming no MsgID). 1508 if len(cfg.Subjects) > 1 { 1509 for _, subj := range cfg.Subjects { 1510 for _, tsubj := range cfg.Subjects { 1511 if tsubj != subj && SubjectsCollide(tsubj, subj) { 1512 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("subject %q overlaps with %q", subj, tsubj)) 1513 } 1514 } 1515 } 1516 } 1517 1518 // If we have a republish directive check if we can create a transform here. 1519 if cfg.RePublish != nil { 1520 // Check to make sure source is a valid subset of the subjects we have. 1521 // Also make sure it does not form a cycle. 1522 // Empty same as all. 1523 if cfg.RePublish.Source == _EMPTY_ { 1524 cfg.RePublish.Source = fwcs 1525 } 1526 var formsCycle bool 1527 for _, subj := range cfg.Subjects { 1528 if SubjectsCollide(cfg.RePublish.Destination, subj) { 1529 formsCycle = true 1530 break 1531 } 1532 } 1533 if formsCycle { 1534 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration for republish destination forms a cycle")) 1535 } 1536 if _, err := NewSubjectTransform(cfg.RePublish.Source, cfg.RePublish.Destination); err != nil { 1537 return StreamConfig{}, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration for republish with transform from '%s' to '%s' not valid", cfg.RePublish.Source, cfg.RePublish.Destination)) 1538 } 1539 } 1540 1541 return cfg, nil 1542 } 1543 1544 // Config returns the stream's configuration. 1545 func (mset *stream) config() StreamConfig { 1546 mset.mu.RLock() 1547 defer mset.mu.RUnlock() 1548 return mset.cfg 1549 } 1550 1551 func (mset *stream) fileStoreConfig() (FileStoreConfig, error) { 1552 mset.mu.Lock() 1553 defer mset.mu.Unlock() 1554 fs, ok := mset.store.(*fileStore) 1555 if !ok { 1556 return FileStoreConfig{}, ErrStoreWrongType 1557 } 1558 return fs.fileStoreConfig(), nil 1559 } 1560 1561 // Do not hold jsAccount or jetStream lock 1562 func (jsa *jsAccount) configUpdateCheck(old, new *StreamConfig, s *Server) (*StreamConfig, error) { 1563 cfg, apiErr := s.checkStreamCfg(new, jsa.acc()) 1564 if apiErr != nil { 1565 return nil, apiErr 1566 } 1567 1568 // Name must match. 1569 if cfg.Name != old.Name { 1570 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration name must match original")) 1571 } 1572 // Can't change MaxConsumers for now. 1573 if cfg.MaxConsumers != old.MaxConsumers { 1574 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not change MaxConsumers")) 1575 } 1576 // Can't change storage types. 1577 if cfg.Storage != old.Storage { 1578 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not change storage type")) 1579 } 1580 // Can only change retention from limits to interest or back, not to/from work queue for now. 1581 if cfg.Retention != old.Retention { 1582 if old.Retention == WorkQueuePolicy || cfg.Retention == WorkQueuePolicy { 1583 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not change retention policy to/from workqueue")) 1584 } 1585 } 1586 // Can not have a template owner for now. 1587 if old.Template != _EMPTY_ { 1588 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update not allowed on template owned stream")) 1589 } 1590 if cfg.Template != _EMPTY_ { 1591 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not be owned by a template")) 1592 } 1593 // Can not change from true to false. 1594 if !cfg.Sealed && old.Sealed { 1595 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not unseal a sealed stream")) 1596 } 1597 // Can not change from true to false. 1598 if !cfg.DenyDelete && old.DenyDelete { 1599 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not cancel deny message deletes")) 1600 } 1601 // Can not change from true to false. 1602 if !cfg.DenyPurge && old.DenyPurge { 1603 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("stream configuration update can not cancel deny purge")) 1604 } 1605 // Check for mirror changes which are not allowed. 1606 if !reflect.DeepEqual(cfg.Mirror, old.Mirror) { 1607 return nil, NewJSStreamMirrorNotUpdatableError() 1608 } 1609 1610 // Check on new discard new per subject. 1611 if cfg.DiscardNewPer { 1612 if cfg.Discard != DiscardNew { 1613 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("discard new per subject requires discard new policy to be set")) 1614 } 1615 if cfg.MaxMsgsPer <= 0 { 1616 return nil, NewJSStreamInvalidConfigError(fmt.Errorf("discard new per subject requires max msgs per subject > 0")) 1617 } 1618 } 1619 1620 // Do some adjustments for being sealed. 1621 if cfg.Sealed { 1622 cfg.MaxAge = 0 1623 cfg.Discard = DiscardNew 1624 cfg.DenyDelete, cfg.DenyPurge = true, true 1625 cfg.AllowRollup = false 1626 } 1627 1628 // Check limits. We need some extra handling to allow updating MaxBytes. 1629 1630 // First, let's calculate the difference between the new and old MaxBytes. 1631 maxBytesDiff := cfg.MaxBytes - old.MaxBytes 1632 if maxBytesDiff < 0 { 1633 // If we're updating to a lower MaxBytes (maxBytesDiff is negative), 1634 // then set to zero so checkBytesLimits doesn't set addBytes to 1. 1635 maxBytesDiff = 0 1636 } 1637 // If maxBytesDiff == 0, then that means MaxBytes didn't change. 1638 // If maxBytesDiff > 0, then we want to reserve additional bytes. 1639 1640 // Save the user configured MaxBytes. 1641 newMaxBytes := cfg.MaxBytes 1642 maxBytesOffset := int64(0) 1643 1644 // We temporarily set cfg.MaxBytes to maxBytesDiff because checkAllLimits 1645 // adds cfg.MaxBytes to the current reserved limit and checks if we've gone 1646 // over. However, we don't want an addition cfg.MaxBytes, we only want to 1647 // reserve the difference between the new and the old values. 1648 cfg.MaxBytes = maxBytesDiff 1649 1650 // Check limits. 1651 js, isClustered := jsa.jetStreamAndClustered() 1652 jsa.mu.RLock() 1653 acc := jsa.account 1654 jsa.usageMu.RLock() 1655 selected, tier, hasTier := jsa.selectLimits(&cfg) 1656 if !hasTier && old.Replicas != cfg.Replicas { 1657 selected, tier, hasTier = jsa.selectLimits(old) 1658 } 1659 jsa.usageMu.RUnlock() 1660 reserved := int64(0) 1661 if !isClustered { 1662 reserved = jsa.tieredReservation(tier, &cfg) 1663 } 1664 jsa.mu.RUnlock() 1665 if !hasTier { 1666 return nil, NewJSNoLimitsError() 1667 } 1668 js.mu.RLock() 1669 defer js.mu.RUnlock() 1670 if isClustered { 1671 _, reserved = tieredStreamAndReservationCount(js.cluster.streams[acc.Name], tier, &cfg) 1672 } 1673 // reservation does not account for this stream, hence add the old value 1674 if tier == _EMPTY_ && old.Replicas > 1 { 1675 reserved += old.MaxBytes * int64(old.Replicas) 1676 } else { 1677 reserved += old.MaxBytes 1678 } 1679 if err := js.checkAllLimits(&selected, &cfg, reserved, maxBytesOffset); err != nil { 1680 return nil, err 1681 } 1682 // Restore the user configured MaxBytes. 1683 cfg.MaxBytes = newMaxBytes 1684 return &cfg, nil 1685 } 1686 1687 // Update will allow certain configuration properties of an existing stream to be updated. 1688 func (mset *stream) update(config *StreamConfig) error { 1689 return mset.updateWithAdvisory(config, true) 1690 } 1691 1692 // Update will allow certain configuration properties of an existing stream to be updated. 1693 func (mset *stream) updateWithAdvisory(config *StreamConfig, sendAdvisory bool) error { 1694 _, jsa, err := mset.acc.checkForJetStream() 1695 if err != nil { 1696 return err 1697 } 1698 1699 mset.mu.RLock() 1700 ocfg := mset.cfg 1701 s := mset.srv 1702 mset.mu.RUnlock() 1703 1704 cfg, err := mset.jsa.configUpdateCheck(&ocfg, config, s) 1705 if err != nil { 1706 return NewJSStreamInvalidConfigError(err, Unless(err)) 1707 } 1708 1709 // In the event that some of the stream-level limits have changed, yell appropriately 1710 // if any of the consumers exceed that limit. 1711 updateLimits := ocfg.ConsumerLimits.InactiveThreshold != cfg.ConsumerLimits.InactiveThreshold || 1712 ocfg.ConsumerLimits.MaxAckPending != cfg.ConsumerLimits.MaxAckPending 1713 if updateLimits { 1714 var errorConsumers []string 1715 consumers := map[string]*ConsumerConfig{} 1716 if mset.js.isClustered() { 1717 for _, c := range mset.sa.consumers { 1718 consumers[c.Name] = c.Config 1719 } 1720 } else { 1721 for _, c := range mset.consumers { 1722 consumers[c.name] = &c.cfg 1723 } 1724 } 1725 for name, ccfg := range consumers { 1726 if ccfg.InactiveThreshold > cfg.ConsumerLimits.InactiveThreshold || 1727 ccfg.MaxAckPending > cfg.ConsumerLimits.MaxAckPending { 1728 errorConsumers = append(errorConsumers, name) 1729 } 1730 } 1731 if len(errorConsumers) > 0 { 1732 // TODO(nat): Return a parsable error so that we can surface something 1733 // sensible through the JS API. 1734 return fmt.Errorf("change to limits violates consumers: %s", strings.Join(errorConsumers, ", ")) 1735 } 1736 } 1737 1738 jsa.mu.RLock() 1739 if jsa.subjectsOverlap(cfg.Subjects, mset) { 1740 jsa.mu.RUnlock() 1741 return NewJSStreamSubjectOverlapError() 1742 } 1743 jsa.mu.RUnlock() 1744 1745 mset.mu.Lock() 1746 if mset.isLeader() { 1747 // Now check for subject interest differences. 1748 current := make(map[string]struct{}, len(ocfg.Subjects)) 1749 for _, s := range ocfg.Subjects { 1750 current[s] = struct{}{} 1751 } 1752 // Update config with new values. The store update will enforce any stricter limits. 1753 1754 // Now walk new subjects. All of these need to be added, but we will check 1755 // the originals first, since if it is in there we can skip, already added. 1756 for _, s := range cfg.Subjects { 1757 if _, ok := current[s]; !ok { 1758 if _, err := mset.subscribeInternal(s, mset.processInboundJetStreamMsg); err != nil { 1759 mset.mu.Unlock() 1760 return err 1761 } 1762 } 1763 delete(current, s) 1764 } 1765 // What is left in current needs to be deleted. 1766 for s := range current { 1767 if err := mset.unsubscribeInternal(s); err != nil { 1768 mset.mu.Unlock() 1769 return err 1770 } 1771 } 1772 1773 // Check for the Duplicates 1774 if cfg.Duplicates != ocfg.Duplicates && mset.ddtmr != nil { 1775 // Let it fire right away, it will adjust properly on purge. 1776 mset.ddtmr.Reset(time.Microsecond) 1777 } 1778 1779 // Check for Sources. 1780 if len(cfg.Sources) > 0 || len(ocfg.Sources) > 0 { 1781 currentIName := make(map[string]struct{}) 1782 needsStartingSeqNum := make(map[string]struct{}) 1783 1784 for _, s := range ocfg.Sources { 1785 currentIName[s.iname] = struct{}{} 1786 } 1787 for _, s := range cfg.Sources { 1788 s.setIndexName() 1789 if _, ok := currentIName[s.iname]; !ok { 1790 // new source 1791 if mset.sources == nil { 1792 mset.sources = make(map[string]*sourceInfo) 1793 } 1794 mset.cfg.Sources = append(mset.cfg.Sources, s) 1795 1796 var si *sourceInfo 1797 1798 if len(s.SubjectTransforms) == 0 { 1799 si = &sourceInfo{name: s.Name, iname: s.iname, sf: s.FilterSubject} 1800 } else { 1801 si = &sourceInfo{name: s.Name, iname: s.iname} 1802 si.trs = make([]*subjectTransform, len(s.SubjectTransforms)) 1803 si.sfs = make([]string, len(s.SubjectTransforms)) 1804 for i := range s.SubjectTransforms { 1805 // err can be ignored as already validated in config check 1806 si.sfs[i] = s.SubjectTransforms[i].Source 1807 var err error 1808 si.trs[i], err = NewSubjectTransform(s.SubjectTransforms[i].Source, s.SubjectTransforms[i].Destination) 1809 if err != nil { 1810 mset.mu.Unlock() 1811 mset.srv.Errorf("Unable to get subject transform for source: %v", err) 1812 } 1813 } 1814 } 1815 1816 mset.sources[s.iname] = si 1817 needsStartingSeqNum[s.iname] = struct{}{} 1818 } else { 1819 // source already exists 1820 delete(currentIName, s.iname) 1821 } 1822 } 1823 // What is left in currentIName needs to be deleted. 1824 for iName := range currentIName { 1825 mset.cancelSourceConsumer(iName) 1826 delete(mset.sources, iName) 1827 } 1828 neededCopy := make(map[string]struct{}, len(needsStartingSeqNum)) 1829 for iName := range needsStartingSeqNum { 1830 neededCopy[iName] = struct{}{} 1831 } 1832 mset.setStartingSequenceForSources(needsStartingSeqNum) 1833 for iName := range neededCopy { 1834 mset.setSourceConsumer(iName, mset.sources[iName].sseq+1, time.Time{}) 1835 } 1836 } 1837 } 1838 1839 // Check for a change in allow direct status. 1840 // These will run on all members, so just update as appropriate here. 1841 // We do make sure we are caught up under monitorStream() during initial startup. 1842 if cfg.AllowDirect != ocfg.AllowDirect { 1843 if cfg.AllowDirect { 1844 mset.subscribeToDirect() 1845 } else { 1846 mset.unsubscribeToDirect() 1847 } 1848 } 1849 1850 // Check for changes to RePublish. 1851 if cfg.RePublish != nil { 1852 // Empty same as all. 1853 if cfg.RePublish.Source == _EMPTY_ { 1854 cfg.RePublish.Source = fwcs 1855 } 1856 if cfg.RePublish.Destination == _EMPTY_ { 1857 cfg.RePublish.Destination = fwcs 1858 } 1859 tr, err := NewSubjectTransform(cfg.RePublish.Source, cfg.RePublish.Destination) 1860 if err != nil { 1861 mset.mu.Unlock() 1862 return fmt.Errorf("stream configuration for republish from '%s' to '%s': %w", cfg.RePublish.Source, cfg.RePublish.Destination, err) 1863 } 1864 // Assign our transform for republishing. 1865 mset.tr = tr 1866 } else { 1867 mset.tr = nil 1868 } 1869 1870 // Check for changes to subject transform 1871 if ocfg.SubjectTransform == nil && cfg.SubjectTransform != nil { 1872 tr, err := NewSubjectTransform(cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination) 1873 if err != nil { 1874 mset.mu.Unlock() 1875 return fmt.Errorf("stream configuration for subject transform from '%s' to '%s': %w", cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination, err) 1876 } 1877 mset.itr = tr 1878 } else if ocfg.SubjectTransform != nil && cfg.SubjectTransform != nil && 1879 (ocfg.SubjectTransform.Source != cfg.SubjectTransform.Source || ocfg.SubjectTransform.Destination != cfg.SubjectTransform.Destination) { 1880 tr, err := NewSubjectTransform(cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination) 1881 if err != nil { 1882 mset.mu.Unlock() 1883 return fmt.Errorf("stream configuration for subject transform from '%s' to '%s': %w", cfg.SubjectTransform.Source, cfg.SubjectTransform.Destination, err) 1884 } 1885 mset.itr = tr 1886 } else if ocfg.SubjectTransform != nil && cfg.SubjectTransform == nil { 1887 mset.itr = nil 1888 } 1889 1890 js := mset.js 1891 1892 if targetTier := tierName(cfg); mset.tier != targetTier { 1893 // In cases such as R1->R3, only one update is needed 1894 jsa.usageMu.RLock() 1895 _, ok := jsa.limits[targetTier] 1896 jsa.usageMu.RUnlock() 1897 if ok { 1898 // error never set 1899 _, reported, _ := mset.store.Utilization() 1900 jsa.updateUsage(mset.tier, mset.stype, -int64(reported)) 1901 jsa.updateUsage(targetTier, mset.stype, int64(reported)) 1902 mset.tier = targetTier 1903 } 1904 // else in case the new tier does not exist (say on move), keep the old tier around 1905 // a subsequent update to an existing tier will then move from existing past tier to existing new tier 1906 } 1907 1908 if mset.isLeader() && mset.sa != nil && ocfg.Retention != cfg.Retention && cfg.Retention == InterestPolicy { 1909 // Before we can update the retention policy for the consumer, we need 1910 // the replica count of all consumers to match the stream. 1911 for _, c := range mset.sa.consumers { 1912 if c.Config.Replicas > 0 && c.Config.Replicas != cfg.Replicas { 1913 mset.mu.Unlock() 1914 return fmt.Errorf("consumer %q replica count must be %d", c.Name, cfg.Replicas) 1915 } 1916 } 1917 } 1918 1919 // Now update config and store's version of our config. 1920 mset.cfg = *cfg 1921 1922 // If we're changing retention and haven't errored because of consumer 1923 // replicas by now, whip through and update the consumer retention. 1924 if ocfg.Retention != cfg.Retention && cfg.Retention == InterestPolicy { 1925 toUpdate := make([]*consumer, 0, len(mset.consumers)) 1926 for _, c := range mset.consumers { 1927 toUpdate = append(toUpdate, c) 1928 } 1929 mset.mu.Unlock() 1930 for _, c := range toUpdate { 1931 c.mu.Lock() 1932 c.retention = cfg.Retention 1933 c.mu.Unlock() 1934 if c.retention == InterestPolicy { 1935 // If we're switching to interest, force a check of the 1936 // interest of existing stream messages. 1937 c.checkStateForInterestStream() 1938 } 1939 } 1940 mset.mu.Lock() 1941 } 1942 1943 // If we are the leader never suppress update advisory, simply send. 1944 if mset.isLeader() && sendAdvisory { 1945 mset.sendUpdateAdvisoryLocked() 1946 } 1947 mset.mu.Unlock() 1948 1949 if js != nil { 1950 maxBytesDiff := cfg.MaxBytes - ocfg.MaxBytes 1951 if maxBytesDiff > 0 { 1952 // Reserve the difference 1953 js.reserveStreamResources(&StreamConfig{ 1954 MaxBytes: maxBytesDiff, 1955 Storage: cfg.Storage, 1956 }) 1957 } else if maxBytesDiff < 0 { 1958 // Release the difference 1959 js.releaseStreamResources(&StreamConfig{ 1960 MaxBytes: -maxBytesDiff, 1961 Storage: ocfg.Storage, 1962 }) 1963 } 1964 } 1965 1966 mset.store.UpdateConfig(cfg) 1967 1968 return nil 1969 } 1970 1971 // Purge will remove all messages from the stream and underlying store based on the request. 1972 func (mset *stream) purge(preq *JSApiStreamPurgeRequest) (purged uint64, err error) { 1973 mset.mu.RLock() 1974 if mset.closed.Load() { 1975 mset.mu.RUnlock() 1976 return 0, errStreamClosed 1977 } 1978 if mset.cfg.Sealed { 1979 mset.mu.RUnlock() 1980 return 0, errors.New("sealed stream") 1981 } 1982 store, mlseq := mset.store, mset.lseq 1983 mset.mu.RUnlock() 1984 1985 if preq != nil { 1986 purged, err = mset.store.PurgeEx(preq.Subject, preq.Sequence, preq.Keep) 1987 } else { 1988 purged, err = mset.store.Purge() 1989 } 1990 if err != nil { 1991 return purged, err 1992 } 1993 1994 // Grab our stream state. 1995 var state StreamState 1996 store.FastState(&state) 1997 fseq, lseq := state.FirstSeq, state.LastSeq 1998 1999 // Check if our last has moved past what our original last sequence was, if so reset. 2000 if lseq > mlseq { 2001 mset.setLastSeq(lseq) 2002 } 2003 2004 // Purge consumers. 2005 // Check for filtered purge. 2006 if preq != nil && preq.Subject != _EMPTY_ { 2007 ss := store.FilteredState(fseq, preq.Subject) 2008 fseq = ss.First 2009 } 2010 2011 mset.clsMu.RLock() 2012 for _, o := range mset.cList { 2013 start := fseq 2014 o.mu.RLock() 2015 // we update consumer sequences if: 2016 // no subject was specified, we can purge all consumers sequences 2017 doPurge := preq == nil || 2018 preq.Subject == _EMPTY_ || 2019 // consumer filter subject is equal to purged subject 2020 // or consumer filter subject is subset of purged subject, 2021 // but not the other way around. 2022 o.isEqualOrSubsetMatch(preq.Subject) 2023 // Check if a consumer has a wider subject space then what we purged 2024 var isWider bool 2025 if !doPurge && preq != nil && o.isFilteredMatch(preq.Subject) { 2026 doPurge, isWider = true, true 2027 start = state.FirstSeq 2028 } 2029 o.mu.RUnlock() 2030 if doPurge { 2031 o.purge(start, lseq, isWider) 2032 } 2033 } 2034 mset.clsMu.RUnlock() 2035 2036 return purged, nil 2037 } 2038 2039 // RemoveMsg will remove a message from a stream. 2040 // FIXME(dlc) - Should pick one and be consistent. 2041 func (mset *stream) removeMsg(seq uint64) (bool, error) { 2042 return mset.deleteMsg(seq) 2043 } 2044 2045 // DeleteMsg will remove a message from a stream. 2046 func (mset *stream) deleteMsg(seq uint64) (bool, error) { 2047 if mset.closed.Load() { 2048 return false, errStreamClosed 2049 } 2050 return mset.store.RemoveMsg(seq) 2051 } 2052 2053 // EraseMsg will securely remove a message and rewrite the data with random data. 2054 func (mset *stream) eraseMsg(seq uint64) (bool, error) { 2055 if mset.closed.Load() { 2056 return false, errStreamClosed 2057 } 2058 return mset.store.EraseMsg(seq) 2059 } 2060 2061 // Are we a mirror? 2062 func (mset *stream) isMirror() bool { 2063 mset.mu.RLock() 2064 defer mset.mu.RUnlock() 2065 return mset.cfg.Mirror != nil 2066 } 2067 2068 func (mset *stream) sourcesInfo() (sis []*StreamSourceInfo) { 2069 mset.mu.RLock() 2070 defer mset.mu.RUnlock() 2071 for _, si := range mset.sources { 2072 sis = append(sis, mset.sourceInfo(si)) 2073 } 2074 return sis 2075 } 2076 2077 // Lock should be held 2078 func (mset *stream) sourceInfo(si *sourceInfo) *StreamSourceInfo { 2079 if si == nil { 2080 return nil 2081 } 2082 2083 var ssi = StreamSourceInfo{Name: si.name, Lag: si.lag, Error: si.err, FilterSubject: si.sf} 2084 2085 trConfigs := make([]SubjectTransformConfig, len(si.sfs)) 2086 for i := range si.sfs { 2087 destination := _EMPTY_ 2088 if si.trs[i] != nil { 2089 destination = si.trs[i].dest 2090 } 2091 trConfigs[i] = SubjectTransformConfig{si.sfs[i], destination} 2092 } 2093 2094 ssi.SubjectTransforms = trConfigs 2095 2096 // If we have not heard from the source, set Active to -1. 2097 if si.last.IsZero() { 2098 ssi.Active = -1 2099 } else { 2100 ssi.Active = time.Since(si.last) 2101 } 2102 2103 var ext *ExternalStream 2104 if mset.cfg.Mirror != nil { 2105 ext = mset.cfg.Mirror.External 2106 } else if ss := mset.streamSource(si.iname); ss != nil && ss.External != nil { 2107 ext = ss.External 2108 } 2109 if ext != nil { 2110 ssi.External = &ExternalStream{ 2111 ApiPrefix: ext.ApiPrefix, 2112 DeliverPrefix: ext.DeliverPrefix, 2113 } 2114 } 2115 return &ssi 2116 } 2117 2118 // Return our source info for our mirror. 2119 func (mset *stream) mirrorInfo() *StreamSourceInfo { 2120 mset.mu.RLock() 2121 defer mset.mu.RUnlock() 2122 return mset.sourceInfo(mset.mirror) 2123 } 2124 2125 const sourceHealthCheckInterval = 1 * time.Second 2126 2127 // Will run as a Go routine to process mirror consumer messages. 2128 func (mset *stream) processMirrorMsgs(mirror *sourceInfo, ready *sync.WaitGroup) { 2129 s := mset.srv 2130 defer func() { 2131 mirror.wg.Done() 2132 s.grWG.Done() 2133 }() 2134 2135 // Grab stream quit channel. 2136 mset.mu.Lock() 2137 msgs, qch, siqch := mirror.msgs, mset.qch, mirror.qch 2138 // Set the last seen as now so that we don't fail at the first check. 2139 mirror.last = time.Now() 2140 mset.mu.Unlock() 2141 2142 // Signal the caller that we have captured the above fields. 2143 ready.Done() 2144 2145 // Make sure we have valid ipq for msgs. 2146 if msgs == nil { 2147 mset.mu.Lock() 2148 mset.cancelMirrorConsumer() 2149 mset.mu.Unlock() 2150 return 2151 } 2152 2153 t := time.NewTicker(sourceHealthCheckInterval) 2154 defer t.Stop() 2155 2156 for { 2157 select { 2158 case <-s.quitCh: 2159 return 2160 case <-qch: 2161 return 2162 case <-siqch: 2163 return 2164 case <-msgs.ch: 2165 ims := msgs.pop() 2166 for _, im := range ims { 2167 if !mset.processInboundMirrorMsg(im) { 2168 break 2169 } 2170 } 2171 msgs.recycle(&ims) 2172 case <-t.C: 2173 mset.mu.RLock() 2174 isLeader := mset.isLeader() 2175 stalled := mset.mirror != nil && time.Since(mset.mirror.last) > 3*sourceHealthCheckInterval 2176 mset.mu.RUnlock() 2177 // No longer leader. 2178 if !isLeader { 2179 mset.mu.Lock() 2180 mset.cancelMirrorConsumer() 2181 mset.mu.Unlock() 2182 return 2183 } 2184 // We are stalled. 2185 if stalled { 2186 mset.retryMirrorConsumer() 2187 } 2188 } 2189 } 2190 } 2191 2192 // Checks that the message is from our current direct consumer. We can not depend on sub comparison 2193 // since cross account imports break. 2194 func (si *sourceInfo) isCurrentSub(reply string) bool { 2195 return si.cname != _EMPTY_ && strings.HasPrefix(reply, jsAckPre) && si.cname == tokenAt(reply, 4) 2196 } 2197 2198 // processInboundMirrorMsg handles processing messages bound for a stream. 2199 func (mset *stream) processInboundMirrorMsg(m *inMsg) bool { 2200 mset.mu.Lock() 2201 if mset.mirror == nil { 2202 mset.mu.Unlock() 2203 return false 2204 } 2205 if !mset.isLeader() { 2206 mset.cancelMirrorConsumer() 2207 mset.mu.Unlock() 2208 return false 2209 } 2210 2211 isControl := m.isControlMsg() 2212 2213 // Ignore from old subscriptions. 2214 // The reason we can not just compare subs is that on cross account imports they will not match. 2215 if !mset.mirror.isCurrentSub(m.rply) && !isControl { 2216 mset.mu.Unlock() 2217 return false 2218 } 2219 2220 mset.mirror.last = time.Now() 2221 node := mset.node 2222 2223 // Check for heartbeats and flow control messages. 2224 if isControl { 2225 var needsRetry bool 2226 // Flow controls have reply subjects. 2227 if m.rply != _EMPTY_ { 2228 mset.handleFlowControl(mset.mirror, m) 2229 } else { 2230 // For idle heartbeats make sure we did not miss anything and check if we are considered stalled. 2231 if ldseq := parseInt64(getHeader(JSLastConsumerSeq, m.hdr)); ldseq > 0 && uint64(ldseq) != mset.mirror.dseq { 2232 needsRetry = true 2233 } else if fcReply := getHeader(JSConsumerStalled, m.hdr); len(fcReply) > 0 { 2234 // Other side thinks we are stalled, so send flow control reply. 2235 mset.outq.sendMsg(string(fcReply), nil) 2236 } 2237 } 2238 mset.mu.Unlock() 2239 if needsRetry { 2240 mset.retryMirrorConsumer() 2241 } 2242 return !needsRetry 2243 } 2244 2245 sseq, dseq, dc, ts, pending := replyInfo(m.rply) 2246 2247 if dc > 1 { 2248 mset.mu.Unlock() 2249 return false 2250 } 2251 2252 // Mirror info tracking. 2253 olag, osseq, odseq := mset.mirror.lag, mset.mirror.sseq, mset.mirror.dseq 2254 if sseq == mset.mirror.sseq+1 { 2255 mset.mirror.dseq = dseq 2256 mset.mirror.sseq++ 2257 } else if sseq <= mset.mirror.sseq { 2258 // Ignore older messages. 2259 mset.mu.Unlock() 2260 return true 2261 } else if mset.mirror.cname == _EMPTY_ { 2262 mset.mirror.cname = tokenAt(m.rply, 4) 2263 mset.mirror.dseq, mset.mirror.sseq = dseq, sseq 2264 } else { 2265 // If the deliver sequence matches then the upstream stream has expired or deleted messages. 2266 if dseq == mset.mirror.dseq+1 { 2267 mset.skipMsgs(mset.mirror.sseq+1, sseq-1) 2268 mset.mirror.dseq++ 2269 mset.mirror.sseq = sseq 2270 } else { 2271 mset.mu.Unlock() 2272 mset.retryMirrorConsumer() 2273 return false 2274 } 2275 } 2276 2277 if pending == 0 { 2278 mset.mirror.lag = 0 2279 } else { 2280 mset.mirror.lag = pending - 1 2281 } 2282 2283 // Check if we allow mirror direct here. If so check they we have mostly caught up. 2284 // The reason we do not require 0 is if the source is active we may always be slightly behind. 2285 if mset.cfg.MirrorDirect && mset.mirror.dsub == nil && pending < dgetCaughtUpThresh { 2286 if err := mset.subscribeToMirrorDirect(); err != nil { 2287 // Disable since we had problems above. 2288 mset.cfg.MirrorDirect = false 2289 } 2290 } 2291 2292 // Do the subject transform if there's one 2293 2294 for _, tr := range mset.mirror.trs { 2295 if tr == nil { 2296 continue 2297 } else { 2298 tsubj, err := tr.Match(m.subj) 2299 if err == nil { 2300 m.subj = tsubj 2301 break 2302 } 2303 } 2304 } 2305 2306 s, js, stype := mset.srv, mset.js, mset.cfg.Storage 2307 mset.mu.Unlock() 2308 2309 var err error 2310 if node != nil { 2311 if js.limitsExceeded(stype) { 2312 s.resourcesExceededError() 2313 err = ApiErrors[JSInsufficientResourcesErr] 2314 } else { 2315 err = node.Propose(encodeStreamMsg(m.subj, _EMPTY_, m.hdr, m.msg, sseq-1, ts)) 2316 } 2317 } else { 2318 err = mset.processJetStreamMsg(m.subj, _EMPTY_, m.hdr, m.msg, sseq-1, ts, nil) 2319 } 2320 if err != nil { 2321 if strings.Contains(err.Error(), "no space left") { 2322 s.Errorf("JetStream out of space, will be DISABLED") 2323 s.DisableJetStream() 2324 return false 2325 } 2326 if err != errLastSeqMismatch { 2327 mset.mu.RLock() 2328 accName, sname := mset.acc.Name, mset.cfg.Name 2329 mset.mu.RUnlock() 2330 s.RateLimitWarnf("Error processing inbound mirror message for '%s' > '%s': %v", 2331 accName, sname, err) 2332 } else { 2333 // We may have missed messages, restart. 2334 if sseq <= mset.lastSeq() { 2335 mset.mu.Lock() 2336 mset.mirror.lag = olag 2337 mset.mirror.sseq = osseq 2338 mset.mirror.dseq = odseq 2339 mset.mu.Unlock() 2340 return false 2341 } else { 2342 mset.mu.Lock() 2343 mset.mirror.dseq = odseq 2344 mset.mirror.sseq = osseq 2345 mset.mu.Unlock() 2346 mset.retryMirrorConsumer() 2347 } 2348 } 2349 } 2350 return err == nil 2351 } 2352 2353 func (mset *stream) setMirrorErr(err *ApiError) { 2354 mset.mu.Lock() 2355 if mset.mirror != nil { 2356 mset.mirror.err = err 2357 } 2358 mset.mu.Unlock() 2359 } 2360 2361 // Cancels a mirror consumer. 2362 // 2363 // Lock held on entry 2364 func (mset *stream) cancelMirrorConsumer() { 2365 if mset.mirror == nil { 2366 return 2367 } 2368 mset.cancelSourceInfo(mset.mirror) 2369 } 2370 2371 // Similar to setupMirrorConsumer except that it will print a debug statement 2372 // indicating that there is a retry. 2373 // 2374 // Lock is acquired in this function 2375 func (mset *stream) retryMirrorConsumer() error { 2376 mset.mu.Lock() 2377 defer mset.mu.Unlock() 2378 mset.srv.Debugf("Retrying mirror consumer for '%s > %s'", mset.acc.Name, mset.cfg.Name) 2379 return mset.setupMirrorConsumer() 2380 } 2381 2382 // Lock should be held. 2383 func (mset *stream) skipMsgs(start, end uint64) { 2384 node, store := mset.node, mset.store 2385 // If we are not clustered we can short circuit now with store.SkipMsgs 2386 if node == nil { 2387 store.SkipMsgs(start, end-start+1) 2388 mset.lseq = end 2389 return 2390 } 2391 2392 // FIXME (dlc) - We should allow proposals of DeleteEange, but would need to make sure all peers support. 2393 // With syncRequest was easy to add bool into request. 2394 var entries []*Entry 2395 for seq := start; seq <= end; seq++ { 2396 entries = append(entries, &Entry{EntryNormal, encodeStreamMsg(_EMPTY_, _EMPTY_, nil, nil, seq-1, 0)}) 2397 // So a single message does not get too big. 2398 if len(entries) > 10_000 { 2399 node.ProposeDirect(entries) 2400 // We need to re-create `entries` because there is a reference 2401 // to it in the node's pae map. 2402 entries = entries[:0] 2403 } 2404 } 2405 // Send all at once. 2406 if len(entries) > 0 { 2407 node.ProposeDirect(entries) 2408 } 2409 } 2410 2411 const ( 2412 // Base retry backoff duration. 2413 retryBackOff = 5 * time.Second 2414 // Maximum amount we will wait. 2415 retryMaximum = 2 * time.Minute 2416 ) 2417 2418 // Calculate our backoff based on number of failures. 2419 func calculateRetryBackoff(fails int) time.Duration { 2420 backoff := time.Duration(retryBackOff) * time.Duration(fails*2) 2421 if backoff > retryMaximum { 2422 backoff = retryMaximum 2423 } 2424 return backoff 2425 } 2426 2427 // This will schedule a call to setupMirrorConsumer, taking into account the last 2428 // time it was retried and determine the soonest setupMirrorConsumer can be called 2429 // without tripping the sourceConsumerRetryThreshold. We will also take into account 2430 // number of failures and will back off our retries. 2431 // The mset.mirror pointer has been verified to be not nil by the caller. 2432 // 2433 // Lock held on entry 2434 func (mset *stream) scheduleSetupMirrorConsumerRetry() { 2435 // We are trying to figure out how soon we can retry. setupMirrorConsumer will reject 2436 // a retry if last was done less than "sourceConsumerRetryThreshold" ago. 2437 next := sourceConsumerRetryThreshold - time.Since(mset.mirror.lreq) 2438 if next < 0 { 2439 // It means that we have passed the threshold and so we are ready to go. 2440 next = 0 2441 } 2442 // Take into account failures here. 2443 next += calculateRetryBackoff(mset.mirror.fails) 2444 2445 // Add some jitter. 2446 next += time.Duration(rand.Intn(int(100*time.Millisecond))) + 100*time.Millisecond 2447 2448 time.AfterFunc(next, func() { 2449 mset.mu.Lock() 2450 mset.setupMirrorConsumer() 2451 mset.mu.Unlock() 2452 }) 2453 } 2454 2455 // Setup our mirror consumer. 2456 // Lock should be held. 2457 func (mset *stream) setupMirrorConsumer() error { 2458 if mset.closed.Load() { 2459 return errStreamClosed 2460 } 2461 if mset.outq == nil { 2462 return errors.New("outq required") 2463 } 2464 // We use to prevent update of a mirror configuration in cluster 2465 // mode but not in standalone. This is now fixed. However, without 2466 // rejecting the update, it could be that if the source stream was 2467 // removed and then later the mirrored stream config changed to 2468 // remove mirror configuration, this function would panic when 2469 // accessing mset.cfg.Mirror fields. Adding this protection in case 2470 // we allow in the future the mirror config to be changed (removed). 2471 if mset.cfg.Mirror == nil { 2472 return errors.New("invalid mirror configuration") 2473 } 2474 2475 // If this is the first time 2476 if mset.mirror == nil { 2477 mset.mirror = &sourceInfo{name: mset.cfg.Mirror.Name} 2478 } else { 2479 mset.cancelSourceInfo(mset.mirror) 2480 mset.mirror.sseq = mset.lseq 2481 2482 // If we are no longer the leader stop trying. 2483 if !mset.isLeader() { 2484 return nil 2485 } 2486 } 2487 mirror := mset.mirror 2488 2489 // We want to throttle here in terms of how fast we request new consumers, 2490 // or if the previous is still in progress. 2491 if last := time.Since(mirror.lreq); last < sourceConsumerRetryThreshold || mirror.sip { 2492 mset.scheduleSetupMirrorConsumerRetry() 2493 return nil 2494 } 2495 mirror.lreq = time.Now() 2496 2497 // Determine subjects etc. 2498 var deliverSubject string 2499 ext := mset.cfg.Mirror.External 2500 2501 if ext != nil && ext.DeliverPrefix != _EMPTY_ { 2502 deliverSubject = strings.ReplaceAll(ext.DeliverPrefix+syncSubject(".M"), "..", ".") 2503 } else { 2504 deliverSubject = syncSubject("$JS.M") 2505 } 2506 2507 // Now send off request to create/update our consumer. This will be all API based even in single server mode. 2508 // We calculate durable names apriori so we do not need to save them off. 2509 2510 var state StreamState 2511 mset.store.FastState(&state) 2512 2513 req := &CreateConsumerRequest{ 2514 Stream: mset.cfg.Mirror.Name, 2515 Config: ConsumerConfig{ 2516 DeliverSubject: deliverSubject, 2517 DeliverPolicy: DeliverByStartSequence, 2518 OptStartSeq: state.LastSeq + 1, 2519 AckPolicy: AckNone, 2520 AckWait: 22 * time.Hour, 2521 MaxDeliver: 1, 2522 Heartbeat: sourceHealthCheckInterval, 2523 FlowControl: true, 2524 Direct: true, 2525 }, 2526 } 2527 2528 // Only use start optionals on first time. 2529 if state.Msgs == 0 && state.FirstSeq == 0 { 2530 req.Config.OptStartSeq = 0 2531 if mset.cfg.Mirror.OptStartSeq > 0 { 2532 req.Config.OptStartSeq = mset.cfg.Mirror.OptStartSeq 2533 } else if mset.cfg.Mirror.OptStartTime != nil { 2534 req.Config.OptStartTime = mset.cfg.Mirror.OptStartTime 2535 req.Config.DeliverPolicy = DeliverByStartTime 2536 } 2537 } 2538 if req.Config.OptStartSeq == 0 && req.Config.OptStartTime == nil { 2539 // If starting out and lastSeq is 0. 2540 req.Config.DeliverPolicy = DeliverAll 2541 } 2542 2543 // Filters 2544 if mset.cfg.Mirror.FilterSubject != _EMPTY_ { 2545 req.Config.FilterSubject = mset.cfg.Mirror.FilterSubject 2546 mirror.sf = mset.cfg.Mirror.FilterSubject 2547 } 2548 2549 if lst := len(mset.cfg.Mirror.SubjectTransforms); lst > 0 { 2550 sfs := make([]string, lst) 2551 trs := make([]*subjectTransform, lst) 2552 2553 for i, tr := range mset.cfg.Mirror.SubjectTransforms { 2554 // will not fail as already checked before that the transform will work 2555 subjectTransform, err := NewSubjectTransform(tr.Source, tr.Destination) 2556 if err != nil { 2557 mset.srv.Errorf("Unable to get transform for mirror consumer: %v", err) 2558 } 2559 sfs[i] = tr.Source 2560 trs[i] = subjectTransform 2561 } 2562 mirror.sfs = sfs 2563 mirror.trs = trs 2564 req.Config.FilterSubjects = sfs 2565 } 2566 2567 respCh := make(chan *JSApiConsumerCreateResponse, 1) 2568 reply := infoReplySubject() 2569 crSub, err := mset.subscribeInternal(reply, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 2570 mset.unsubscribe(sub) 2571 _, msg := c.msgParts(rmsg) 2572 2573 var ccr JSApiConsumerCreateResponse 2574 if err := json.Unmarshal(msg, &ccr); err != nil { 2575 c.Warnf("JetStream bad mirror consumer create response: %q", msg) 2576 mset.setMirrorErr(ApiErrors[JSInvalidJSONErr]) 2577 return 2578 } 2579 select { 2580 case respCh <- &ccr: 2581 default: 2582 } 2583 }) 2584 if err != nil { 2585 mirror.err = NewJSMirrorConsumerSetupFailedError(err, Unless(err)) 2586 mset.scheduleSetupMirrorConsumerRetry() 2587 return nil 2588 } 2589 2590 b, _ := json.Marshal(req) 2591 2592 var subject string 2593 if req.Config.FilterSubject != _EMPTY_ { 2594 req.Config.Name = fmt.Sprintf("mirror-%s", createConsumerName()) 2595 subject = fmt.Sprintf(JSApiConsumerCreateExT, mset.cfg.Mirror.Name, req.Config.Name, req.Config.FilterSubject) 2596 } else { 2597 subject = fmt.Sprintf(JSApiConsumerCreateT, mset.cfg.Mirror.Name) 2598 } 2599 if ext != nil { 2600 subject = strings.Replace(subject, JSApiPrefix, ext.ApiPrefix, 1) 2601 subject = strings.ReplaceAll(subject, "..", ".") 2602 } 2603 2604 // Reset 2605 mirror.msgs = nil 2606 mirror.err = nil 2607 mirror.sip = true 2608 2609 // Send the consumer create request 2610 mset.outq.send(newJSPubMsg(subject, _EMPTY_, reply, nil, b, nil, 0)) 2611 2612 go func() { 2613 2614 var retry bool 2615 defer func() { 2616 mset.mu.Lock() 2617 // Check that this is still valid and if so, clear the "setup in progress" flag. 2618 if mset.mirror != nil { 2619 mset.mirror.sip = false 2620 // If we need to retry, schedule now 2621 if retry { 2622 mset.mirror.fails++ 2623 // Cancel here since we can not do anything with this consumer at this point. 2624 mset.cancelSourceInfo(mset.mirror) 2625 mset.scheduleSetupMirrorConsumerRetry() 2626 } else { 2627 // Clear on success. 2628 mset.mirror.fails = 0 2629 } 2630 } 2631 mset.mu.Unlock() 2632 }() 2633 2634 // Wait for previous processMirrorMsgs go routine to be completely done. 2635 // If none is running, this will not block. 2636 mirror.wg.Wait() 2637 2638 select { 2639 case ccr := <-respCh: 2640 mset.mu.Lock() 2641 // Mirror config has been removed. 2642 if mset.mirror == nil { 2643 mset.mu.Unlock() 2644 return 2645 } 2646 ready := sync.WaitGroup{} 2647 mirror := mset.mirror 2648 mirror.err = nil 2649 if ccr.Error != nil || ccr.ConsumerInfo == nil { 2650 mset.srv.Warnf("JetStream error response for create mirror consumer: %+v", ccr.Error) 2651 mirror.err = ccr.Error 2652 // Let's retry as soon as possible, but we are gated by sourceConsumerRetryThreshold 2653 retry = true 2654 mset.mu.Unlock() 2655 return 2656 } else { 2657 // Setup actual subscription to process messages from our source. 2658 qname := fmt.Sprintf("[ACC:%s] stream mirror '%s' of '%s' msgs", mset.acc.Name, mset.cfg.Name, mset.cfg.Mirror.Name) 2659 // Create a new queue each time 2660 mirror.msgs = newIPQueue[*inMsg](mset.srv, qname) 2661 msgs := mirror.msgs 2662 sub, err := mset.subscribeInternal(deliverSubject, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 2663 hdr, msg := c.msgParts(copyBytes(rmsg)) // Need to copy. 2664 mset.queueInbound(msgs, subject, reply, hdr, msg, nil) 2665 }) 2666 if err != nil { 2667 mirror.err = NewJSMirrorConsumerSetupFailedError(err, Unless(err)) 2668 retry = true 2669 mset.mu.Unlock() 2670 return 2671 } 2672 // Save our sub. 2673 mirror.sub = sub 2674 2675 // When an upstream stream expires messages or in general has messages that we want 2676 // that are no longer available we need to adjust here. 2677 var state StreamState 2678 mset.store.FastState(&state) 2679 2680 // Check if we need to skip messages. 2681 if state.LastSeq != ccr.ConsumerInfo.Delivered.Stream { 2682 // Check to see if delivered is past our last and we have no msgs. This will help the 2683 // case when mirroring a stream that has a very high starting sequence number. 2684 if state.Msgs == 0 && ccr.ConsumerInfo.Delivered.Stream > state.LastSeq { 2685 mset.store.PurgeEx(_EMPTY_, ccr.ConsumerInfo.Delivered.Stream+1, 0) 2686 mset.lseq = ccr.ConsumerInfo.Delivered.Stream 2687 } else { 2688 mset.skipMsgs(state.LastSeq+1, ccr.ConsumerInfo.Delivered.Stream) 2689 } 2690 } 2691 2692 // Capture consumer name. 2693 mirror.cname = ccr.ConsumerInfo.Name 2694 mirror.dseq = 0 2695 mirror.sseq = ccr.ConsumerInfo.Delivered.Stream 2696 mirror.qch = make(chan struct{}) 2697 mirror.wg.Add(1) 2698 ready.Add(1) 2699 if !mset.srv.startGoRoutine( 2700 func() { mset.processMirrorMsgs(mirror, &ready) }, 2701 pprofLabels{ 2702 "type": "mirror", 2703 "account": mset.acc.Name, 2704 "stream": mset.cfg.Name, 2705 "consumer": mirror.cname, 2706 }, 2707 ) { 2708 ready.Done() 2709 } 2710 } 2711 mset.mu.Unlock() 2712 ready.Wait() 2713 case <-time.After(5 * time.Second): 2714 mset.unsubscribe(crSub) 2715 // We already waited 5 seconds, let's retry now. 2716 retry = true 2717 } 2718 }() 2719 2720 return nil 2721 } 2722 2723 func (mset *stream) streamSource(iname string) *StreamSource { 2724 for _, ssi := range mset.cfg.Sources { 2725 if ssi.iname == iname { 2726 return ssi 2727 } 2728 } 2729 return nil 2730 } 2731 2732 func (mset *stream) retrySourceConsumer(iName string) { 2733 mset.mu.Lock() 2734 defer mset.mu.Unlock() 2735 2736 si := mset.sources[iName] 2737 if si == nil { 2738 return 2739 } 2740 var ss = mset.streamSource(iName) 2741 if ss != nil { 2742 iNameMap := map[string]struct{}{ 2743 iName: {}, 2744 } 2745 mset.setStartingSequenceForSources(iNameMap) 2746 mset.retrySourceConsumerAtSeq(iName, si.sseq+1) 2747 } 2748 } 2749 2750 // Same than setSourceConsumer but simply issue a debug statement indicating 2751 // that there is a retry. 2752 // 2753 // Lock should be held. 2754 func (mset *stream) retrySourceConsumerAtSeq(iname string, seq uint64) { 2755 s := mset.srv 2756 2757 s.Debugf("Retrying source consumer for '%s > %s'", mset.acc.Name, mset.cfg.Name) 2758 2759 // setSourceConsumer will check that the source is still configured. 2760 mset.setSourceConsumer(iname, seq, time.Time{}) 2761 } 2762 2763 // Lock should be held. 2764 func (mset *stream) cancelSourceConsumer(iname string) { 2765 if si := mset.sources[iname]; si != nil { 2766 mset.cancelSourceInfo(si) 2767 si.sseq, si.dseq = 0, 0 2768 } 2769 } 2770 2771 // The `si` has been verified to be not nil. The sourceInfo's sub will 2772 // be unsubscribed and set to nil (if not already done) and the 2773 // cname will be reset. The message processing's go routine quit channel 2774 // will be closed if still opened. 2775 // 2776 // Lock should be held 2777 func (mset *stream) cancelSourceInfo(si *sourceInfo) { 2778 if si.sub != nil { 2779 mset.unsubscribe(si.sub) 2780 si.sub = nil 2781 } 2782 // In case we had a mirror direct subscription. 2783 if si.dsub != nil { 2784 mset.unsubscribe(si.dsub) 2785 si.dsub = nil 2786 } 2787 mset.removeInternalConsumer(si) 2788 if si.qch != nil { 2789 close(si.qch) 2790 si.qch = nil 2791 } 2792 if si.msgs != nil { 2793 si.msgs.drain() 2794 si.msgs.unregister() 2795 } 2796 } 2797 2798 const sourceConsumerRetryThreshold = 2 * time.Second 2799 2800 // This will schedule a call to setSourceConsumer, taking into account the last 2801 // time it was retried and determine the soonest setSourceConsumer can be called 2802 // without tripping the sourceConsumerRetryThreshold. 2803 // 2804 // Lock held on entry 2805 func (mset *stream) scheduleSetSourceConsumerRetry(si *sourceInfo, seq uint64, startTime time.Time) { 2806 // We are trying to figure out how soon we can retry. setSourceConsumer will reject 2807 // a retry if last was done less than "sourceConsumerRetryThreshold" ago. 2808 next := sourceConsumerRetryThreshold - time.Since(si.lreq) 2809 if next < 0 { 2810 // It means that we have passed the threshold and so we are ready to go. 2811 next = 0 2812 } 2813 // Take into account failures here. 2814 next += calculateRetryBackoff(si.fails) 2815 2816 // To make *sure* that the next request will not fail, add a bit of buffer 2817 // and some randomness. 2818 next += time.Duration(rand.Intn(int(10*time.Millisecond))) + 10*time.Millisecond 2819 mset.scheduleSetSourceConsumer(si.iname, seq, next, startTime) 2820 } 2821 2822 // Simply schedules setSourceConsumer at the given delay. 2823 // 2824 // Lock held on entry 2825 func (mset *stream) scheduleSetSourceConsumer(iname string, seq uint64, delay time.Duration, startTime time.Time) { 2826 if mset.sourceRetries == nil { 2827 mset.sourceRetries = map[string]*time.Timer{} 2828 } 2829 if t, ok := mset.sourceRetries[iname]; ok && !t.Stop() { 2830 // It looks like the goroutine has started running but hasn't taken the 2831 // stream lock yet (otherwise the map entry would be deleted). We had 2832 // might as well let the running goroutine complete and schedule another 2833 // timer only if it needs to. 2834 return 2835 } 2836 mset.sourceRetries[iname] = time.AfterFunc(delay, func() { 2837 mset.mu.Lock() 2838 defer mset.mu.Unlock() 2839 2840 delete(mset.sourceRetries, iname) 2841 mset.setSourceConsumer(iname, seq, startTime) 2842 }) 2843 } 2844 2845 // Lock should be held. 2846 func (mset *stream) setSourceConsumer(iname string, seq uint64, startTime time.Time) { 2847 // Ignore if closed. 2848 if mset.closed.Load() { 2849 return 2850 } 2851 2852 si := mset.sources[iname] 2853 if si == nil { 2854 return 2855 } 2856 // Cancel previous instance if applicable 2857 mset.cancelSourceInfo(si) 2858 2859 ssi := mset.streamSource(iname) 2860 if ssi == nil { 2861 return 2862 } 2863 2864 // We want to throttle here in terms of how fast we request new consumers, 2865 // or if the previous is still in progress. 2866 if last := time.Since(si.lreq); last < sourceConsumerRetryThreshold || si.sip { 2867 mset.scheduleSetSourceConsumerRetry(si, seq, startTime) 2868 return 2869 } 2870 si.lreq = time.Now() 2871 2872 // Determine subjects etc. 2873 var deliverSubject string 2874 ext := ssi.External 2875 2876 if ext != nil && ext.DeliverPrefix != _EMPTY_ { 2877 deliverSubject = strings.ReplaceAll(ext.DeliverPrefix+syncSubject(".S"), "..", ".") 2878 } else { 2879 deliverSubject = syncSubject("$JS.S") 2880 } 2881 2882 req := &CreateConsumerRequest{ 2883 Stream: si.name, 2884 Config: ConsumerConfig{ 2885 DeliverSubject: deliverSubject, 2886 AckPolicy: AckNone, 2887 AckWait: 22 * time.Hour, 2888 MaxDeliver: 1, 2889 Heartbeat: sourceHealthCheckInterval, 2890 FlowControl: true, 2891 Direct: true, 2892 }, 2893 } 2894 2895 // If starting, check any configs. 2896 if !startTime.IsZero() && seq > 1 { 2897 req.Config.OptStartTime = &startTime 2898 req.Config.DeliverPolicy = DeliverByStartTime 2899 } else if seq <= 1 { 2900 if ssi.OptStartSeq > 0 { 2901 req.Config.OptStartSeq = ssi.OptStartSeq 2902 req.Config.DeliverPolicy = DeliverByStartSequence 2903 } else if ssi.OptStartTime != nil { 2904 // Check to see if our configured start is before what we remember. 2905 // Applicable on restart similar to below. 2906 if ssi.OptStartTime.Before(si.start) { 2907 req.Config.OptStartTime = &si.start 2908 } else { 2909 req.Config.OptStartTime = ssi.OptStartTime 2910 } 2911 req.Config.DeliverPolicy = DeliverByStartTime 2912 } else if !si.start.IsZero() { 2913 // We are falling back to time based startup on a recover, but our messages are gone. e.g. purge, expired, retention policy. 2914 req.Config.OptStartTime = &si.start 2915 req.Config.DeliverPolicy = DeliverByStartTime 2916 } 2917 } else { 2918 req.Config.OptStartSeq = seq 2919 req.Config.DeliverPolicy = DeliverByStartSequence 2920 } 2921 // Filters 2922 if ssi.FilterSubject != _EMPTY_ { 2923 req.Config.FilterSubject = ssi.FilterSubject 2924 } 2925 2926 var filterSubjects []string 2927 for _, tr := range ssi.SubjectTransforms { 2928 filterSubjects = append(filterSubjects, tr.Source) 2929 } 2930 req.Config.FilterSubjects = filterSubjects 2931 2932 respCh := make(chan *JSApiConsumerCreateResponse, 1) 2933 reply := infoReplySubject() 2934 crSub, err := mset.subscribeInternal(reply, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 2935 mset.unsubscribe(sub) 2936 _, msg := c.msgParts(rmsg) 2937 var ccr JSApiConsumerCreateResponse 2938 if err := json.Unmarshal(msg, &ccr); err != nil { 2939 c.Warnf("JetStream bad source consumer create response: %q", msg) 2940 return 2941 } 2942 select { 2943 case respCh <- &ccr: 2944 default: 2945 } 2946 }) 2947 if err != nil { 2948 si.err = NewJSSourceConsumerSetupFailedError(err, Unless(err)) 2949 mset.scheduleSetSourceConsumerRetry(si, seq, startTime) 2950 return 2951 } 2952 2953 var subject string 2954 if req.Config.FilterSubject != _EMPTY_ { 2955 req.Config.Name = fmt.Sprintf("src-%s", createConsumerName()) 2956 subject = fmt.Sprintf(JSApiConsumerCreateExT, si.name, req.Config.Name, req.Config.FilterSubject) 2957 } else if len(req.Config.FilterSubjects) == 1 { 2958 req.Config.Name = fmt.Sprintf("src-%s", createConsumerName()) 2959 // It is necessary to switch to using FilterSubject here as the extended consumer 2960 // create API checks for it, so as to not accidentally allow multiple filtered subjects. 2961 req.Config.FilterSubject = req.Config.FilterSubjects[0] 2962 req.Config.FilterSubjects = nil 2963 subject = fmt.Sprintf(JSApiConsumerCreateExT, si.name, req.Config.Name, req.Config.FilterSubject) 2964 } else { 2965 subject = fmt.Sprintf(JSApiConsumerCreateT, si.name) 2966 } 2967 if ext != nil { 2968 subject = strings.Replace(subject, JSApiPrefix, ext.ApiPrefix, 1) 2969 subject = strings.ReplaceAll(subject, "..", ".") 2970 } 2971 2972 // Marshal request. 2973 b, _ := json.Marshal(req) 2974 2975 // Reset 2976 si.msgs = nil 2977 si.err = nil 2978 si.sip = true 2979 2980 // Send the consumer create request 2981 mset.outq.send(newJSPubMsg(subject, _EMPTY_, reply, nil, b, nil, 0)) 2982 2983 go func() { 2984 2985 var retry bool 2986 defer func() { 2987 mset.mu.Lock() 2988 // Check that this is still valid and if so, clear the "setup in progress" flag. 2989 if si := mset.sources[iname]; si != nil { 2990 si.sip = false 2991 // If we need to retry, schedule now 2992 if retry { 2993 si.fails++ 2994 // Cancel here since we can not do anything with this consumer at this point. 2995 mset.cancelSourceInfo(si) 2996 mset.scheduleSetSourceConsumerRetry(si, seq, startTime) 2997 } else { 2998 // Clear on success. 2999 si.fails = 0 3000 } 3001 } 3002 mset.mu.Unlock() 3003 }() 3004 3005 // Wait for previous processSourceMsgs go routine to be completely done. 3006 // If none is running, this will not block. 3007 si.wg.Wait() 3008 3009 select { 3010 case ccr := <-respCh: 3011 ready := sync.WaitGroup{} 3012 mset.mu.Lock() 3013 // Check that it has not been removed or canceled (si.sub would be nil) 3014 if si := mset.sources[iname]; si != nil { 3015 si.err = nil 3016 if ccr.Error != nil || ccr.ConsumerInfo == nil { 3017 // Note: this warning can happen a few times when starting up the server when sourcing streams are 3018 // defined, this is normal as the streams are re-created in no particular order and it is possible 3019 // that a stream sourcing another could come up before all of its sources have been recreated. 3020 mset.srv.Warnf("JetStream error response for stream %s create source consumer %s: %+v", mset.cfg.Name, si.name, ccr.Error) 3021 si.err = ccr.Error 3022 // Let's retry as soon as possible, but we are gated by sourceConsumerRetryThreshold 3023 retry = true 3024 mset.mu.Unlock() 3025 return 3026 } else { 3027 // Setup actual subscription to process messages from our source. 3028 qname := fmt.Sprintf("[ACC:%s] stream source '%s' from '%s' msgs", mset.acc.Name, mset.cfg.Name, si.name) 3029 // Create a new queue each time 3030 si.msgs = newIPQueue[*inMsg](mset.srv, qname) 3031 msgs := si.msgs 3032 sub, err := mset.subscribeInternal(deliverSubject, func(sub *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 3033 hdr, msg := c.msgParts(copyBytes(rmsg)) // Need to copy. 3034 mset.queueInbound(msgs, subject, reply, hdr, msg, nil) 3035 }) 3036 if err != nil { 3037 si.err = NewJSSourceConsumerSetupFailedError(err, Unless(err)) 3038 retry = true 3039 mset.mu.Unlock() 3040 return 3041 } 3042 // Save our sub. 3043 si.sub = sub 3044 3045 if si.sseq != ccr.ConsumerInfo.Delivered.Stream { 3046 si.sseq = ccr.ConsumerInfo.Delivered.Stream + 1 3047 } 3048 // Capture consumer name. 3049 si.cname = ccr.ConsumerInfo.Name 3050 // Do not set si.sseq to seq here. si.sseq will be set in processInboundSourceMsg 3051 si.dseq = 0 3052 si.qch = make(chan struct{}) 3053 si.wg.Add(1) 3054 ready.Add(1) 3055 if !mset.srv.startGoRoutine( 3056 func() { mset.processSourceMsgs(si, &ready) }, 3057 pprofLabels{ 3058 "type": "source", 3059 "account": mset.acc.Name, 3060 "stream": mset.cfg.Name, 3061 "consumer": si.cname, 3062 }, 3063 ) { 3064 ready.Done() 3065 } 3066 } 3067 } 3068 mset.mu.Unlock() 3069 ready.Wait() 3070 case <-time.After(5 * time.Second): 3071 mset.unsubscribe(crSub) 3072 // We already waited 5 seconds, let's retry now. 3073 retry = true 3074 } 3075 }() 3076 } 3077 3078 func (mset *stream) processSourceMsgs(si *sourceInfo, ready *sync.WaitGroup) { 3079 s := mset.srv 3080 defer func() { 3081 si.wg.Done() 3082 s.grWG.Done() 3083 }() 3084 3085 // Grab some stream and sourceInfo values now... 3086 mset.mu.Lock() 3087 msgs, qch, siqch, iname := si.msgs, mset.qch, si.qch, si.iname 3088 // Set the last seen as now so that we don't fail at the first check. 3089 si.last = time.Now() 3090 mset.mu.Unlock() 3091 3092 // Signal the caller that we have captured the above fields. 3093 ready.Done() 3094 3095 t := time.NewTicker(sourceHealthCheckInterval) 3096 defer t.Stop() 3097 3098 for { 3099 select { 3100 case <-s.quitCh: 3101 return 3102 case <-qch: 3103 return 3104 case <-siqch: 3105 return 3106 case <-msgs.ch: 3107 ims := msgs.pop() 3108 for _, im := range ims { 3109 if !mset.processInboundSourceMsg(si, im) { 3110 break 3111 } 3112 } 3113 msgs.recycle(&ims) 3114 case <-t.C: 3115 mset.mu.RLock() 3116 isLeader := mset.isLeader() 3117 stalled := time.Since(si.last) > 3*sourceHealthCheckInterval 3118 mset.mu.RUnlock() 3119 // No longer leader. 3120 if !isLeader { 3121 mset.mu.Lock() 3122 mset.cancelSourceConsumer(iname) 3123 mset.mu.Unlock() 3124 return 3125 } 3126 // We are stalled. 3127 if stalled { 3128 mset.mu.Lock() 3129 // We don't need to schedule here, we are going to simply 3130 // call setSourceConsumer with the current state+1. 3131 mset.setSourceConsumer(iname, si.sseq+1, time.Time{}) 3132 mset.mu.Unlock() 3133 } 3134 } 3135 } 3136 } 3137 3138 // isControlMsg determines if this is a control message. 3139 func (m *inMsg) isControlMsg() bool { 3140 return len(m.msg) == 0 && len(m.hdr) > 0 && bytes.HasPrefix(m.hdr, []byte("NATS/1.0 100 ")) 3141 } 3142 3143 // Sends a reply to a flow control request. 3144 func (mset *stream) sendFlowControlReply(reply string) { 3145 mset.mu.RLock() 3146 if mset.isLeader() && mset.outq != nil { 3147 mset.outq.sendMsg(reply, nil) 3148 } 3149 mset.mu.RUnlock() 3150 } 3151 3152 // handleFlowControl will properly handle flow control messages for both R==1 and R>1. 3153 // Lock should be held. 3154 func (mset *stream) handleFlowControl(si *sourceInfo, m *inMsg) { 3155 // If we are clustered we will send the flow control message through the replication stack. 3156 if mset.isClustered() { 3157 mset.node.Propose(encodeStreamMsg(_EMPTY_, m.rply, m.hdr, nil, 0, 0)) 3158 } else { 3159 mset.outq.sendMsg(m.rply, nil) 3160 } 3161 } 3162 3163 // processInboundSourceMsg handles processing other stream messages bound for this stream. 3164 func (mset *stream) processInboundSourceMsg(si *sourceInfo, m *inMsg) bool { 3165 mset.mu.Lock() 3166 3167 // If we are no longer the leader cancel this subscriber. 3168 if !mset.isLeader() { 3169 mset.cancelSourceConsumer(si.iname) 3170 mset.mu.Unlock() 3171 return false 3172 } 3173 3174 isControl := m.isControlMsg() 3175 3176 // Ignore from old subscriptions. 3177 if !si.isCurrentSub(m.rply) && !isControl { 3178 mset.mu.Unlock() 3179 return false 3180 } 3181 3182 si.last = time.Now() 3183 node := mset.node 3184 3185 // Check for heartbeats and flow control messages. 3186 if isControl { 3187 var needsRetry bool 3188 // Flow controls have reply subjects. 3189 if m.rply != _EMPTY_ { 3190 mset.handleFlowControl(si, m) 3191 } else { 3192 // For idle heartbeats make sure we did not miss anything. 3193 if ldseq := parseInt64(getHeader(JSLastConsumerSeq, m.hdr)); ldseq > 0 && uint64(ldseq) != si.dseq { 3194 needsRetry = true 3195 mset.retrySourceConsumerAtSeq(si.iname, si.sseq+1) 3196 } else if fcReply := getHeader(JSConsumerStalled, m.hdr); len(fcReply) > 0 { 3197 // Other side thinks we are stalled, so send flow control reply. 3198 mset.outq.sendMsg(string(fcReply), nil) 3199 } 3200 } 3201 mset.mu.Unlock() 3202 return !needsRetry 3203 } 3204 3205 sseq, dseq, dc, _, pending := replyInfo(m.rply) 3206 3207 if dc > 1 { 3208 mset.mu.Unlock() 3209 return false 3210 } 3211 3212 // Tracking is done here. 3213 if dseq == si.dseq+1 { 3214 si.dseq++ 3215 si.sseq = sseq 3216 } else if dseq > si.dseq { 3217 if si.cname == _EMPTY_ { 3218 si.cname = tokenAt(m.rply, 4) 3219 si.dseq, si.sseq = dseq, sseq 3220 } else { 3221 mset.retrySourceConsumerAtSeq(si.iname, si.sseq+1) 3222 mset.mu.Unlock() 3223 return false 3224 } 3225 } else { 3226 mset.mu.Unlock() 3227 return false 3228 } 3229 3230 if pending == 0 { 3231 si.lag = 0 3232 } else { 3233 si.lag = pending - 1 3234 } 3235 mset.mu.Unlock() 3236 3237 hdr, msg := m.hdr, m.msg 3238 3239 // If we are daisy chained here make sure to remove the original one. 3240 if len(hdr) > 0 { 3241 hdr = removeHeaderIfPresent(hdr, JSStreamSource) 3242 } 3243 // Hold onto the origin reply which has all the metadata. 3244 hdr = genHeader(hdr, JSStreamSource, si.genSourceHeader(m.rply)) 3245 3246 // Do the subject transform for the source if there's one 3247 3248 for _, tr := range si.trs { 3249 if tr == nil { 3250 continue 3251 } else { 3252 tsubj, err := tr.Match(m.subj) 3253 if err == nil { 3254 m.subj = tsubj 3255 break 3256 } 3257 } 3258 } 3259 3260 var err error 3261 // If we are clustered we need to propose this message to the underlying raft group. 3262 if node != nil { 3263 err = mset.processClusteredInboundMsg(m.subj, _EMPTY_, hdr, msg, nil) 3264 } else { 3265 err = mset.processJetStreamMsg(m.subj, _EMPTY_, hdr, msg, 0, 0, nil) 3266 } 3267 3268 if err != nil { 3269 s := mset.srv 3270 if strings.Contains(err.Error(), "no space left") { 3271 s.Errorf("JetStream out of space, will be DISABLED") 3272 s.DisableJetStream() 3273 } else { 3274 mset.mu.RLock() 3275 accName, sname, iname := mset.acc.Name, mset.cfg.Name, si.iname 3276 mset.mu.RUnlock() 3277 // Log some warning for errors other than errLastSeqMismatch 3278 if err != errLastSeqMismatch { 3279 s.RateLimitWarnf("Error processing inbound source %q for '%s' > '%s': %v", 3280 iname, accName, sname, err) 3281 } 3282 // Retry in all type of errors. 3283 // This will make sure the source is still in mset.sources map, 3284 // find the last sequence and then call setSourceConsumer. 3285 mset.retrySourceConsumer(iname) 3286 } 3287 return false 3288 } 3289 3290 return true 3291 } 3292 3293 // Generate a new (2.10) style source header (stream name, sequence number, source filter, source destination transform). 3294 func (si *sourceInfo) genSourceHeader(reply string) string { 3295 var b strings.Builder 3296 iNameParts := strings.Split(si.iname, " ") 3297 3298 b.WriteString(iNameParts[0]) 3299 b.WriteByte(' ') 3300 // Grab sequence as text here from reply subject. 3301 var tsa [expectedNumReplyTokens]string 3302 start, tokens := 0, tsa[:0] 3303 for i := 0; i < len(reply); i++ { 3304 if reply[i] == btsep { 3305 tokens, start = append(tokens, reply[start:i]), i+1 3306 } 3307 } 3308 tokens = append(tokens, reply[start:]) 3309 seq := "1" // Default 3310 if len(tokens) == expectedNumReplyTokens && tokens[0] == "$JS" && tokens[1] == "ACK" { 3311 seq = tokens[5] 3312 } 3313 b.WriteString(seq) 3314 3315 b.WriteByte(' ') 3316 b.WriteString(iNameParts[1]) 3317 b.WriteByte(' ') 3318 b.WriteString(iNameParts[2]) 3319 return b.String() 3320 } 3321 3322 // Original version of header that stored ack reply direct. 3323 func streamAndSeqFromAckReply(reply string) (string, string, uint64) { 3324 tsa := [expectedNumReplyTokens]string{} 3325 start, tokens := 0, tsa[:0] 3326 for i := 0; i < len(reply); i++ { 3327 if reply[i] == btsep { 3328 tokens, start = append(tokens, reply[start:i]), i+1 3329 } 3330 } 3331 tokens = append(tokens, reply[start:]) 3332 if len(tokens) != expectedNumReplyTokens || tokens[0] != "$JS" || tokens[1] != "ACK" { 3333 return _EMPTY_, _EMPTY_, 0 3334 } 3335 return tokens[2], _EMPTY_, uint64(parseAckReplyNum(tokens[5])) 3336 } 3337 3338 // Extract the stream name, the source index name and the message sequence number from the source header. 3339 // Uses the filter and transform arguments to provide backwards compatibility 3340 func streamAndSeq(shdr string) (string, string, uint64) { 3341 if strings.HasPrefix(shdr, jsAckPre) { 3342 return streamAndSeqFromAckReply(shdr) 3343 } 3344 // New version which is stream index name <SPC> sequence 3345 fields := strings.Split(shdr, " ") 3346 nFields := len(fields) 3347 3348 if nFields != 2 && nFields <= 3 { 3349 return _EMPTY_, _EMPTY_, 0 3350 } 3351 3352 if nFields >= 4 { 3353 return fields[0], strings.Join([]string{fields[0], fields[2], fields[3]}, " "), uint64(parseAckReplyNum(fields[1])) 3354 } else { 3355 return fields[0], _EMPTY_, uint64(parseAckReplyNum(fields[1])) 3356 } 3357 3358 } 3359 3360 // Lock should be held. 3361 func (mset *stream) setStartingSequenceForSources(iNames map[string]struct{}) { 3362 var state StreamState 3363 mset.store.FastState(&state) 3364 3365 // Do not reset sseq here so we can remember when purge/expiration happens. 3366 if state.Msgs == 0 { 3367 for iName := range iNames { 3368 si := mset.sources[iName] 3369 if si == nil { 3370 continue 3371 } else { 3372 si.dseq = 0 3373 } 3374 } 3375 return 3376 } 3377 3378 var smv StoreMsg 3379 for seq := state.LastSeq; seq >= state.FirstSeq; seq-- { 3380 sm, err := mset.store.LoadMsg(seq, &smv) 3381 if err != nil || len(sm.hdr) == 0 { 3382 continue 3383 } 3384 ss := getHeader(JSStreamSource, sm.hdr) 3385 if len(ss) == 0 { 3386 continue 3387 } 3388 streamName, indexName, sseq := streamAndSeq(string(ss)) 3389 3390 if _, ok := iNames[indexName]; ok { 3391 si := mset.sources[indexName] 3392 si.sseq = sseq 3393 si.dseq = 0 3394 delete(iNames, indexName) 3395 } else if indexName == _EMPTY_ && streamName != _EMPTY_ { 3396 for iName := range iNames { 3397 // TODO streamSource is a linear walk, to optimize later 3398 if si := mset.sources[iName]; si != nil && streamName == si.name || 3399 (mset.streamSource(iName).External != nil && streamName == si.name+":"+getHash(mset.streamSource(iName).External.ApiPrefix)) { 3400 si.sseq = sseq 3401 si.dseq = 0 3402 delete(iNames, iName) 3403 break 3404 } 3405 } 3406 } 3407 if len(iNames) == 0 { 3408 break 3409 } 3410 } 3411 } 3412 3413 // lock should be held. 3414 // Resets the SourceInfo for all the sources 3415 func (mset *stream) resetSourceInfo() { 3416 mset.sources = make(map[string]*sourceInfo) 3417 3418 for _, ssi := range mset.cfg.Sources { 3419 if ssi.iname == _EMPTY_ { 3420 ssi.setIndexName() 3421 } 3422 3423 var si *sourceInfo 3424 3425 if len(ssi.SubjectTransforms) == 0 { 3426 si = &sourceInfo{name: ssi.Name, iname: ssi.iname, sf: ssi.FilterSubject} 3427 } else { 3428 sfs := make([]string, len(ssi.SubjectTransforms)) 3429 trs := make([]*subjectTransform, len(ssi.SubjectTransforms)) 3430 for i, str := range ssi.SubjectTransforms { 3431 tr, err := NewSubjectTransform(str.Source, str.Destination) 3432 if err != nil { 3433 mset.srv.Errorf("Unable to get subject transform for source: %v", err) 3434 } 3435 sfs[i] = str.Source 3436 trs[i] = tr 3437 } 3438 si = &sourceInfo{name: ssi.Name, iname: ssi.iname, sfs: sfs, trs: trs} 3439 } 3440 mset.sources[ssi.iname] = si 3441 } 3442 } 3443 3444 // Lock should be held. 3445 // This will do a reverse scan on startup or leader election 3446 // searching for the starting sequence number. 3447 // This can be slow in degenerative cases. 3448 // Lock should be held. 3449 func (mset *stream) startingSequenceForSources() { 3450 if len(mset.cfg.Sources) == 0 { 3451 return 3452 } 3453 3454 // Always reset here. 3455 mset.resetSourceInfo() 3456 3457 var state StreamState 3458 mset.store.FastState(&state) 3459 3460 // If the last time has been stamped remember in case we need to fall back to this for any given upstream source. 3461 // TODO(dlc) - This will be ok, but should formalize with new approach and more formal and durable state. 3462 if !state.LastTime.IsZero() { 3463 for _, si := range mset.sources { 3464 si.start = state.LastTime 3465 } 3466 } 3467 // Bail if no messages, meaning no context. 3468 if state.Msgs == 0 { 3469 return 3470 } 3471 3472 // For short circuiting return. 3473 expected := len(mset.cfg.Sources) 3474 seqs := make(map[string]uint64) 3475 3476 // Stamp our si seq records on the way out. 3477 defer func() { 3478 for sname, seq := range seqs { 3479 // Ignore if not set. 3480 if seq == 0 { 3481 continue 3482 } 3483 if si := mset.sources[sname]; si != nil { 3484 si.sseq = seq 3485 si.dseq = 0 3486 } 3487 } 3488 }() 3489 3490 var smv StoreMsg 3491 for seq := state.LastSeq; seq >= state.FirstSeq; seq-- { 3492 sm, err := mset.store.LoadMsg(seq, &smv) 3493 if err != nil || sm == nil || len(sm.hdr) == 0 { 3494 continue 3495 } 3496 ss := getHeader(JSStreamSource, sm.hdr) 3497 if len(ss) == 0 { 3498 continue 3499 } 3500 3501 var update = func(iName string, seq uint64) { 3502 // Only update active in case we have older ones in here that got configured out. 3503 if si := mset.sources[iName]; si != nil { 3504 if _, ok := seqs[iName]; !ok { 3505 seqs[iName] = seq 3506 } 3507 } 3508 } 3509 3510 streamName, iName, sSeq := streamAndSeq(string(ss)) 3511 if iName == _EMPTY_ { // Pre-2.10 message header means it's a match for any source using that stream name 3512 for _, ssi := range mset.cfg.Sources { 3513 if streamName == ssi.Name || (ssi.External != nil && streamName == ssi.Name+":"+getHash(ssi.External.ApiPrefix)) { 3514 update(ssi.iname, sSeq) 3515 } 3516 } 3517 } else { 3518 update(iName, sSeq) 3519 } 3520 if len(seqs) == expected { 3521 return 3522 } 3523 } 3524 } 3525 3526 // Setup our source consumers. 3527 // Lock should be held. 3528 func (mset *stream) setupSourceConsumers() error { 3529 if mset.outq == nil { 3530 return errors.New("outq required") 3531 } 3532 // Reset if needed. 3533 for _, si := range mset.sources { 3534 if si.sub != nil { 3535 mset.cancelSourceConsumer(si.iname) 3536 } 3537 } 3538 3539 // If we are no longer the leader, give up 3540 if !mset.isLeader() { 3541 return nil 3542 } 3543 3544 mset.startingSequenceForSources() 3545 3546 // Setup our consumers at the proper starting position. 3547 for _, ssi := range mset.cfg.Sources { 3548 if si := mset.sources[ssi.iname]; si != nil { 3549 mset.setSourceConsumer(ssi.iname, si.sseq+1, time.Time{}) 3550 } 3551 } 3552 3553 return nil 3554 } 3555 3556 // Will create internal subscriptions for the stream. 3557 // Lock should be held. 3558 func (mset *stream) subscribeToStream() error { 3559 if mset.active { 3560 return nil 3561 } 3562 for _, subject := range mset.cfg.Subjects { 3563 if _, err := mset.subscribeInternal(subject, mset.processInboundJetStreamMsg); err != nil { 3564 return err 3565 } 3566 } 3567 // Check if we need to setup mirroring. 3568 if mset.cfg.Mirror != nil { 3569 // setup the initial mirror sourceInfo 3570 mset.mirror = &sourceInfo{name: mset.cfg.Mirror.Name} 3571 sfs := make([]string, len(mset.cfg.Mirror.SubjectTransforms)) 3572 trs := make([]*subjectTransform, len(mset.cfg.Mirror.SubjectTransforms)) 3573 3574 for i, tr := range mset.cfg.Mirror.SubjectTransforms { 3575 // will not fail as already checked before that the transform will work 3576 subjectTransform, err := NewSubjectTransform(tr.Source, tr.Destination) 3577 if err != nil { 3578 mset.srv.Errorf("Unable to get transform for mirror consumer: %v", err) 3579 } 3580 3581 sfs[i] = tr.Source 3582 trs[i] = subjectTransform 3583 } 3584 mset.mirror.sfs = sfs 3585 mset.mirror.trs = trs 3586 // delay the actual mirror consumer creation for after a delay 3587 mset.scheduleSetupMirrorConsumerRetry() 3588 } else if len(mset.cfg.Sources) > 0 { 3589 // Setup the initial source infos for the sources 3590 mset.resetSourceInfo() 3591 // Delay the actual source consumer(s) creation(s) for after a delay 3592 3593 mset.sourcesConsumerSetup = time.AfterFunc(time.Duration(rand.Intn(int(10*time.Millisecond)))+10*time.Millisecond, func() { 3594 mset.mu.Lock() 3595 mset.setupSourceConsumers() 3596 mset.mu.Unlock() 3597 }) 3598 } 3599 // Check for direct get access. 3600 // We spin up followers for clustered streams in monitorStream(). 3601 if mset.cfg.AllowDirect { 3602 if err := mset.subscribeToDirect(); err != nil { 3603 return err 3604 } 3605 } 3606 3607 mset.active = true 3608 return nil 3609 } 3610 3611 // Lock should be held. 3612 func (mset *stream) subscribeToDirect() error { 3613 // We will make this listen on a queue group by default, which can allow mirrors to participate on opt-in basis. 3614 if mset.directSub == nil { 3615 dsubj := fmt.Sprintf(JSDirectMsgGetT, mset.cfg.Name) 3616 if sub, err := mset.queueSubscribeInternal(dsubj, dgetGroup, mset.processDirectGetRequest); err == nil { 3617 mset.directSub = sub 3618 } else { 3619 return err 3620 } 3621 } 3622 // Now the one that will have subject appended past stream name. 3623 if mset.lastBySub == nil { 3624 dsubj := fmt.Sprintf(JSDirectGetLastBySubjectT, mset.cfg.Name, fwcs) 3625 // We will make this listen on a queue group by default, which can allow mirrors to participate on opt-in basis. 3626 if sub, err := mset.queueSubscribeInternal(dsubj, dgetGroup, mset.processDirectGetLastBySubjectRequest); err == nil { 3627 mset.lastBySub = sub 3628 } else { 3629 return err 3630 } 3631 } 3632 3633 return nil 3634 } 3635 3636 // Lock should be held. 3637 func (mset *stream) unsubscribeToDirect() { 3638 if mset.directSub != nil { 3639 mset.unsubscribe(mset.directSub) 3640 mset.directSub = nil 3641 } 3642 if mset.lastBySub != nil { 3643 mset.unsubscribe(mset.lastBySub) 3644 mset.lastBySub = nil 3645 } 3646 } 3647 3648 // Lock should be held. 3649 func (mset *stream) subscribeToMirrorDirect() error { 3650 if mset.mirror == nil { 3651 return nil 3652 } 3653 3654 // We will make this listen on a queue group by default, which can allow mirrors to participate on opt-in basis. 3655 if mset.mirror.dsub == nil { 3656 dsubj := fmt.Sprintf(JSDirectMsgGetT, mset.mirror.name) 3657 // We will make this listen on a queue group by default, which can allow mirrors to participate on opt-in basis. 3658 if sub, err := mset.queueSubscribeInternal(dsubj, dgetGroup, mset.processDirectGetRequest); err == nil { 3659 mset.mirror.dsub = sub 3660 } else { 3661 return err 3662 } 3663 } 3664 // Now the one that will have subject appended past stream name. 3665 if mset.mirror.lbsub == nil { 3666 dsubj := fmt.Sprintf(JSDirectGetLastBySubjectT, mset.mirror.name, fwcs) 3667 // We will make this listen on a queue group by default, which can allow mirrors to participate on opt-in basis. 3668 if sub, err := mset.queueSubscribeInternal(dsubj, dgetGroup, mset.processDirectGetLastBySubjectRequest); err == nil { 3669 mset.mirror.lbsub = sub 3670 } else { 3671 return err 3672 } 3673 } 3674 3675 return nil 3676 } 3677 3678 // Stop our source consumers. 3679 // Lock should be held. 3680 func (mset *stream) stopSourceConsumers() { 3681 for _, si := range mset.sources { 3682 mset.cancelSourceInfo(si) 3683 } 3684 } 3685 3686 // Lock should be held. 3687 func (mset *stream) removeInternalConsumer(si *sourceInfo) { 3688 if si == nil || si.cname == _EMPTY_ { 3689 return 3690 } 3691 si.cname = _EMPTY_ 3692 } 3693 3694 // Will unsubscribe from the stream. 3695 // Lock should be held. 3696 func (mset *stream) unsubscribeToStream(stopping bool) error { 3697 for _, subject := range mset.cfg.Subjects { 3698 mset.unsubscribeInternal(subject) 3699 } 3700 if mset.mirror != nil { 3701 mset.cancelSourceInfo(mset.mirror) 3702 mset.mirror = nil 3703 } 3704 3705 if len(mset.sources) > 0 { 3706 mset.stopSourceConsumers() 3707 } 3708 3709 // In case we had a direct get subscriptions. 3710 if stopping { 3711 mset.unsubscribeToDirect() 3712 } 3713 3714 mset.active = false 3715 return nil 3716 } 3717 3718 // Lock does NOT need to be held, we set the client on setup and never change it at this point. 3719 func (mset *stream) subscribeInternal(subject string, cb msgHandler) (*subscription, error) { 3720 if mset.closed.Load() { 3721 return nil, errStreamClosed 3722 } 3723 if cb == nil { 3724 return nil, errInvalidMsgHandler 3725 } 3726 c := mset.client 3727 sid := int(mset.sid.Add(1)) 3728 // Now create the subscription 3729 return c.processSub([]byte(subject), nil, []byte(strconv.Itoa(sid)), cb, false) 3730 } 3731 3732 // Lock does NOT need to be held, we set the client on setup and never change it at this point. 3733 func (mset *stream) queueSubscribeInternal(subject, group string, cb msgHandler) (*subscription, error) { 3734 if mset.closed.Load() { 3735 return nil, errStreamClosed 3736 } 3737 if cb == nil { 3738 return nil, errInvalidMsgHandler 3739 } 3740 c := mset.client 3741 sid := int(mset.sid.Add(1)) 3742 // Now create the subscription 3743 return c.processSub([]byte(subject), []byte(group), []byte(strconv.Itoa(sid)), cb, false) 3744 } 3745 3746 // This will unsubscribe us from the exact subject given. 3747 // We do not currently track the subs so do not have the sid. 3748 // This should be called only on an update. 3749 // Lock does NOT need to be held, we set the client on setup and never change it at this point. 3750 func (mset *stream) unsubscribeInternal(subject string) error { 3751 if mset.closed.Load() { 3752 return errStreamClosed 3753 } 3754 c := mset.client 3755 var sid []byte 3756 c.mu.Lock() 3757 for _, sub := range c.subs { 3758 if subject == string(sub.subject) { 3759 sid = sub.sid 3760 break 3761 } 3762 } 3763 c.mu.Unlock() 3764 3765 if sid != nil { 3766 return c.processUnsub(sid) 3767 } 3768 return nil 3769 } 3770 3771 // Lock should be held. 3772 func (mset *stream) unsubscribe(sub *subscription) { 3773 if sub == nil || mset.closed.Load() { 3774 return 3775 } 3776 mset.client.processUnsub(sub.sid) 3777 } 3778 3779 func (mset *stream) setupStore(fsCfg *FileStoreConfig) error { 3780 mset.mu.Lock() 3781 mset.created = time.Now().UTC() 3782 3783 switch mset.cfg.Storage { 3784 case MemoryStorage: 3785 ms, err := newMemStore(&mset.cfg) 3786 if err != nil { 3787 mset.mu.Unlock() 3788 return err 3789 } 3790 mset.store = ms 3791 case FileStorage: 3792 s := mset.srv 3793 prf := s.jsKeyGen(s.getOpts().JetStreamKey, mset.acc.Name) 3794 if prf != nil { 3795 // We are encrypted here, fill in correct cipher selection. 3796 fsCfg.Cipher = s.getOpts().JetStreamCipher 3797 } 3798 oldprf := s.jsKeyGen(s.getOpts().JetStreamOldKey, mset.acc.Name) 3799 cfg := *fsCfg 3800 cfg.srv = s 3801 fs, err := newFileStoreWithCreated(cfg, mset.cfg, mset.created, prf, oldprf) 3802 if err != nil { 3803 mset.mu.Unlock() 3804 return err 3805 } 3806 mset.store = fs 3807 } 3808 // This will fire the callback but we do not require the lock since md will be 0 here. 3809 mset.store.RegisterStorageUpdates(mset.storeUpdates) 3810 mset.mu.Unlock() 3811 3812 return nil 3813 } 3814 3815 // Called for any updates to the underlying stream. We pass through the bytes to the 3816 // jetstream account. We do local processing for stream pending for consumers, but only 3817 // for removals. 3818 // Lock should not be held. 3819 func (mset *stream) storeUpdates(md, bd int64, seq uint64, subj string) { 3820 // If we have a single negative update then we will process our consumers for stream pending. 3821 // Purge and Store handled separately inside individual calls. 3822 if md == -1 && seq > 0 && subj != _EMPTY_ { 3823 // We use our consumer list mutex here instead of the main stream lock since it may be held already. 3824 mset.clsMu.RLock() 3825 // TODO(dlc) - Do sublist like signaling so we do not have to match? 3826 for _, o := range mset.cList { 3827 o.decStreamPending(seq, subj) 3828 } 3829 mset.clsMu.RUnlock() 3830 } else if md < 0 { 3831 // Batch decrements we need to force consumers to re-calculate num pending. 3832 mset.clsMu.RLock() 3833 for _, o := range mset.cList { 3834 o.streamNumPendingLocked() 3835 } 3836 mset.clsMu.RUnlock() 3837 } 3838 3839 if mset.jsa != nil { 3840 mset.jsa.updateUsage(mset.tier, mset.stype, bd) 3841 } 3842 } 3843 3844 // NumMsgIds returns the number of message ids being tracked for duplicate suppression. 3845 func (mset *stream) numMsgIds() int { 3846 mset.mu.Lock() 3847 defer mset.mu.Unlock() 3848 if !mset.ddloaded { 3849 mset.rebuildDedupe() 3850 } 3851 return len(mset.ddmap) 3852 } 3853 3854 // checkMsgId will process and check for duplicates. 3855 // Lock should be held. 3856 func (mset *stream) checkMsgId(id string) *ddentry { 3857 if !mset.ddloaded { 3858 mset.rebuildDedupe() 3859 } 3860 if id == _EMPTY_ || len(mset.ddmap) == 0 { 3861 return nil 3862 } 3863 return mset.ddmap[id] 3864 } 3865 3866 // Will purge the entries that are past the window. 3867 // Should be called from a timer. 3868 func (mset *stream) purgeMsgIds() { 3869 mset.mu.Lock() 3870 defer mset.mu.Unlock() 3871 3872 now := time.Now().UnixNano() 3873 tmrNext := mset.cfg.Duplicates 3874 window := int64(tmrNext) 3875 3876 for i, dde := range mset.ddarr[mset.ddindex:] { 3877 if now-dde.ts >= window { 3878 delete(mset.ddmap, dde.id) 3879 } else { 3880 mset.ddindex += i 3881 // Check if we should garbage collect here if we are 1/3 total size. 3882 if cap(mset.ddarr) > 3*(len(mset.ddarr)-mset.ddindex) { 3883 mset.ddarr = append([]*ddentry(nil), mset.ddarr[mset.ddindex:]...) 3884 mset.ddindex = 0 3885 } 3886 tmrNext = time.Duration(window - (now - dde.ts)) 3887 break 3888 } 3889 } 3890 if len(mset.ddmap) > 0 { 3891 // Make sure to not fire too quick 3892 const minFire = 50 * time.Millisecond 3893 if tmrNext < minFire { 3894 tmrNext = minFire 3895 } 3896 if mset.ddtmr != nil { 3897 mset.ddtmr.Reset(tmrNext) 3898 } else { 3899 mset.ddtmr = time.AfterFunc(tmrNext, mset.purgeMsgIds) 3900 } 3901 } else { 3902 if mset.ddtmr != nil { 3903 mset.ddtmr.Stop() 3904 mset.ddtmr = nil 3905 } 3906 mset.ddmap = nil 3907 mset.ddarr = nil 3908 mset.ddindex = 0 3909 } 3910 } 3911 3912 // storeMsgId will store the message id for duplicate detection. 3913 func (mset *stream) storeMsgId(dde *ddentry) { 3914 mset.mu.Lock() 3915 defer mset.mu.Unlock() 3916 mset.storeMsgIdLocked(dde) 3917 } 3918 3919 // storeMsgIdLocked will store the message id for duplicate detection. 3920 // Lock should he held. 3921 func (mset *stream) storeMsgIdLocked(dde *ddentry) { 3922 if mset.ddmap == nil { 3923 mset.ddmap = make(map[string]*ddentry) 3924 } 3925 mset.ddmap[dde.id] = dde 3926 mset.ddarr = append(mset.ddarr, dde) 3927 if mset.ddtmr == nil { 3928 mset.ddtmr = time.AfterFunc(mset.cfg.Duplicates, mset.purgeMsgIds) 3929 } 3930 } 3931 3932 // Fast lookup of msgId. 3933 func getMsgId(hdr []byte) string { 3934 return string(getHeader(JSMsgId, hdr)) 3935 } 3936 3937 // Fast lookup of expected last msgId. 3938 func getExpectedLastMsgId(hdr []byte) string { 3939 return string(getHeader(JSExpectedLastMsgId, hdr)) 3940 } 3941 3942 // Fast lookup of expected stream. 3943 func getExpectedStream(hdr []byte) string { 3944 return string(getHeader(JSExpectedStream, hdr)) 3945 } 3946 3947 // Fast lookup of expected stream. 3948 func getExpectedLastSeq(hdr []byte) (uint64, bool) { 3949 bseq := getHeader(JSExpectedLastSeq, hdr) 3950 if len(bseq) == 0 { 3951 return 0, false 3952 } 3953 return uint64(parseInt64(bseq)), true 3954 } 3955 3956 // Fast lookup of rollups. 3957 func getRollup(hdr []byte) string { 3958 r := getHeader(JSMsgRollup, hdr) 3959 if len(r) == 0 { 3960 return _EMPTY_ 3961 } 3962 return strings.ToLower(string(r)) 3963 } 3964 3965 // Fast lookup of expected stream sequence per subject. 3966 func getExpectedLastSeqPerSubject(hdr []byte) (uint64, bool) { 3967 bseq := getHeader(JSExpectedLastSubjSeq, hdr) 3968 if len(bseq) == 0 { 3969 return 0, false 3970 } 3971 return uint64(parseInt64(bseq)), true 3972 } 3973 3974 // Signal if we are clustered. Will acquire rlock. 3975 func (mset *stream) IsClustered() bool { 3976 mset.mu.RLock() 3977 defer mset.mu.RUnlock() 3978 return mset.isClustered() 3979 } 3980 3981 // Lock should be held. 3982 func (mset *stream) isClustered() bool { 3983 return mset.node != nil 3984 } 3985 3986 // Used if we have to queue things internally to avoid the route/gw path. 3987 type inMsg struct { 3988 subj string 3989 rply string 3990 hdr []byte 3991 msg []byte 3992 mt *msgTrace 3993 } 3994 3995 func (mset *stream) queueInbound(ib *ipQueue[*inMsg], subj, rply string, hdr, msg []byte, mt *msgTrace) { 3996 ib.push(&inMsg{subj, rply, hdr, msg, mt}) 3997 } 3998 3999 var dgPool = sync.Pool{ 4000 New: func() interface{} { 4001 return &directGetReq{} 4002 }, 4003 } 4004 4005 // For when we need to not inline the request. 4006 type directGetReq struct { 4007 // Copy of this is correct for this. 4008 req JSApiMsgGetRequest 4009 reply string 4010 } 4011 4012 // processDirectGetRequest handles direct get request for stream messages. 4013 func (mset *stream) processDirectGetRequest(_ *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 4014 if len(reply) == 0 { 4015 return 4016 } 4017 _, msg := c.msgParts(rmsg) 4018 if len(msg) == 0 { 4019 hdr := []byte("NATS/1.0 408 Empty Request\r\n\r\n") 4020 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4021 return 4022 } 4023 var req JSApiMsgGetRequest 4024 err := json.Unmarshal(msg, &req) 4025 if err != nil { 4026 hdr := []byte("NATS/1.0 408 Malformed Request\r\n\r\n") 4027 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4028 return 4029 } 4030 // Check if nothing set. 4031 if req.Seq == 0 && req.LastFor == _EMPTY_ && req.NextFor == _EMPTY_ && len(req.MultiLastFor) == 0 { 4032 hdr := []byte("NATS/1.0 408 Empty Request\r\n\r\n") 4033 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4034 return 4035 } 4036 // Check that we do not have both options set. 4037 // We do not allow batch mode for lastFor requests. 4038 if (req.Seq > 0 && req.LastFor != _EMPTY_) || 4039 (req.LastFor != _EMPTY_ && req.NextFor != _EMPTY_) || 4040 (req.LastFor != _EMPTY_ && req.Batch > 0) || 4041 (req.LastFor != _EMPTY_ && len(req.MultiLastFor) > 0) || 4042 (req.NextFor != _EMPTY_ && len(req.MultiLastFor) > 0) || 4043 (req.UpToSeq > 0 && req.UpToTime != nil) { 4044 hdr := []byte("NATS/1.0 408 Bad Request\r\n\r\n") 4045 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4046 return 4047 } 4048 4049 inlineOk := c.kind != ROUTER && c.kind != GATEWAY && c.kind != LEAF 4050 if !inlineOk { 4051 dg := dgPool.Get().(*directGetReq) 4052 dg.req, dg.reply = req, reply 4053 mset.gets.push(dg) 4054 } else { 4055 mset.getDirectRequest(&req, reply) 4056 } 4057 } 4058 4059 // This is for direct get by last subject which is part of the subject itself. 4060 func (mset *stream) processDirectGetLastBySubjectRequest(_ *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 4061 if len(reply) == 0 { 4062 return 4063 } 4064 _, msg := c.msgParts(rmsg) 4065 // This version expects no payload. 4066 if len(msg) != 0 { 4067 hdr := []byte("NATS/1.0 408 Bad Request\r\n\r\n") 4068 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4069 return 4070 } 4071 // Extract the key. 4072 var key string 4073 for i, n := 0, 0; i < len(subject); i++ { 4074 if subject[i] == btsep { 4075 if n == 4 { 4076 if start := i + 1; start < len(subject) { 4077 key = subject[i+1:] 4078 } 4079 break 4080 } 4081 n++ 4082 } 4083 } 4084 if len(key) == 0 { 4085 hdr := []byte("NATS/1.0 408 Bad Request\r\n\r\n") 4086 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4087 return 4088 } 4089 4090 req := JSApiMsgGetRequest{LastFor: key} 4091 4092 inlineOk := c.kind != ROUTER && c.kind != GATEWAY && c.kind != LEAF 4093 if !inlineOk { 4094 dg := dgPool.Get().(*directGetReq) 4095 dg.req, dg.reply = req, reply 4096 mset.gets.push(dg) 4097 } else { 4098 mset.getDirectRequest(&req, reply) 4099 } 4100 } 4101 4102 // For direct get batch and multi requests. 4103 const ( 4104 dg = "NATS/1.0\r\nNats-Stream: %s\r\nNats-Subject: %s\r\nNats-Sequence: %d\r\nNats-Time-Stamp: %s\r\n\r\n" 4105 dgb = "NATS/1.0\r\nNats-Stream: %s\r\nNats-Subject: %s\r\nNats-Sequence: %d\r\nNats-Time-Stamp: %s\r\nNats-Num-Pending: %d\r\nNats-Last-Sequence: %d\r\n\r\n" 4106 eob = "NATS/1.0 204 EOB\r\nNats-Num-Pending: %d\r\nNats-Last-Sequence: %d\r\n\r\n" 4107 eobm = "NATS/1.0 204 EOB\r\nNats-Num-Pending: %d\r\nNats-Last-Sequence: %d\r\nNats-UpTo-Sequence: %d\r\n\r\n" 4108 ) 4109 4110 // Handle a multi request. 4111 func (mset *stream) getDirectMulti(req *JSApiMsgGetRequest, reply string) { 4112 // TODO(dlc) - Make configurable? 4113 const maxAllowedResponses = 1024 4114 4115 // We hold the lock here to try to avoid changes out from underneath of us. 4116 mset.mu.RLock() 4117 defer mset.mu.RUnlock() 4118 // Grab store and name. 4119 store, name, s := mset.store, mset.cfg.Name, mset.srv 4120 4121 // Grab MaxBytes 4122 mb := req.MaxBytes 4123 if mb == 0 && s != nil { 4124 // Fill in with the server's MaxPending. 4125 mb = int(s.opts.MaxPending) 4126 } 4127 4128 upToSeq := req.UpToSeq 4129 // If we have UpToTime set get the proper sequence. 4130 if req.UpToTime != nil { 4131 upToSeq = store.GetSeqFromTime((*req.UpToTime).UTC()) 4132 // We need to back off one since this is used to determine start sequence normally, 4133 // were as here we want it to be the ceiling. 4134 upToSeq-- 4135 } 4136 // If not set, set to the last sequence and remember that for EOB. 4137 if upToSeq == 0 { 4138 var state StreamState 4139 mset.store.FastState(&state) 4140 upToSeq = state.LastSeq 4141 } 4142 4143 seqs, err := store.MultiLastSeqs(req.MultiLastFor, upToSeq, maxAllowedResponses) 4144 if err != nil { 4145 var hdr []byte 4146 if err == ErrTooManyResults { 4147 hdr = []byte("NATS/1.0 413 Too Many Results\r\n\r\n") 4148 } else { 4149 hdr = []byte(fmt.Sprintf("NATS/1.0 500 %v\r\n\r\n", err)) 4150 } 4151 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4152 return 4153 } 4154 if len(seqs) == 0 { 4155 hdr := []byte("NATS/1.0 404 No Results\r\n\r\n") 4156 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4157 return 4158 } 4159 4160 np, lseq, sentBytes, sent := uint64(len(seqs)-1), uint64(0), 0, 0 4161 for _, seq := range seqs { 4162 if seq < req.Seq { 4163 if np > 0 { 4164 np-- 4165 } 4166 continue 4167 } 4168 var svp StoreMsg 4169 sm, err := store.LoadMsg(seq, &svp) 4170 if err != nil { 4171 hdr := []byte("NATS/1.0 404 Message Not Found\r\n\r\n") 4172 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4173 return 4174 } 4175 4176 hdr := sm.hdr 4177 ts := time.Unix(0, sm.ts).UTC() 4178 4179 if len(hdr) == 0 { 4180 hdr = fmt.Appendf(nil, dgb, name, sm.subj, sm.seq, ts.Format(time.RFC3339Nano), np, lseq) 4181 } else { 4182 hdr = copyBytes(hdr) 4183 hdr = genHeader(hdr, JSStream, name) 4184 hdr = genHeader(hdr, JSSubject, sm.subj) 4185 hdr = genHeader(hdr, JSSequence, strconv.FormatUint(sm.seq, 10)) 4186 hdr = genHeader(hdr, JSTimeStamp, ts.Format(time.RFC3339Nano)) 4187 hdr = genHeader(hdr, JSNumPending, strconv.FormatUint(np, 10)) 4188 hdr = genHeader(hdr, JSLastSequence, strconv.FormatUint(lseq, 10)) 4189 } 4190 // Decrement num pending. This is optimization and we do not continue to look it up for these operations. 4191 if np > 0 { 4192 np-- 4193 } 4194 // Track our lseq 4195 lseq = sm.seq 4196 // Send out our message. 4197 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, sm.msg, nil, 0)) 4198 // Check if we have exceeded max bytes. 4199 sentBytes += len(sm.subj) + len(sm.hdr) + len(sm.msg) 4200 if sentBytes >= mb { 4201 break 4202 } 4203 sent++ 4204 if req.Batch > 0 && sent >= req.Batch { 4205 break 4206 } 4207 } 4208 4209 // Send out EOB 4210 hdr := fmt.Appendf(nil, eobm, np, lseq, upToSeq) 4211 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4212 } 4213 4214 // Do actual work on a direct msg request. 4215 // This could be called in a Go routine if we are inline for a non-client connection. 4216 func (mset *stream) getDirectRequest(req *JSApiMsgGetRequest, reply string) { 4217 // Handle multi in separate function. 4218 if len(req.MultiLastFor) > 0 { 4219 mset.getDirectMulti(req, reply) 4220 return 4221 } 4222 4223 mset.mu.RLock() 4224 store, name, s := mset.store, mset.cfg.Name, mset.srv 4225 mset.mu.RUnlock() 4226 4227 seq := req.Seq 4228 wc := subjectHasWildcard(req.NextFor) 4229 // For tracking num pending if we are batch. 4230 var np, lseq, validThrough uint64 4231 var isBatchRequest bool 4232 batch := req.Batch 4233 if batch == 0 { 4234 batch = 1 4235 } else { 4236 // This is a batch request, capture initial numPending. 4237 isBatchRequest = true 4238 np, validThrough = store.NumPending(seq, req.NextFor, false) 4239 } 4240 4241 // Grab MaxBytes 4242 mb := req.MaxBytes 4243 if mb == 0 && s != nil { 4244 // Fill in with the server's MaxPending. 4245 mb = int(s.opts.MaxPending) 4246 } 4247 // Track what we have sent. 4248 var sentBytes int 4249 4250 // Loop over batch, which defaults to 1. 4251 for i := 0; i < batch; i++ { 4252 var ( 4253 svp StoreMsg 4254 sm *StoreMsg 4255 err error 4256 ) 4257 if seq > 0 && req.NextFor == _EMPTY_ { 4258 // Only do direct lookup for first in a batch. 4259 if i == 0 { 4260 sm, err = store.LoadMsg(seq, &svp) 4261 } else { 4262 // We want to use load next with fwcs to step over deleted msgs. 4263 sm, seq, err = store.LoadNextMsg(fwcs, true, seq, &svp) 4264 } 4265 // Bump for next loop if applicable. 4266 seq++ 4267 } else if req.NextFor != _EMPTY_ { 4268 sm, seq, err = store.LoadNextMsg(req.NextFor, wc, seq, &svp) 4269 seq++ 4270 } else { 4271 // Batch is not applicable here, this is checked before we get here. 4272 sm, err = store.LoadLastMsg(req.LastFor, &svp) 4273 } 4274 if err != nil { 4275 // For batches, if we stop early we want to do EOB logic below. 4276 if batch > 1 && i > 0 { 4277 break 4278 } 4279 hdr := []byte("NATS/1.0 404 Message Not Found\r\n\r\n") 4280 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4281 return 4282 } 4283 4284 hdr := sm.hdr 4285 ts := time.Unix(0, sm.ts).UTC() 4286 4287 if isBatchRequest { 4288 if len(hdr) == 0 { 4289 hdr = fmt.Appendf(nil, dgb, name, sm.subj, sm.seq, ts.Format(time.RFC3339Nano), np, lseq) 4290 } else { 4291 hdr = copyBytes(hdr) 4292 hdr = genHeader(hdr, JSStream, name) 4293 hdr = genHeader(hdr, JSSubject, sm.subj) 4294 hdr = genHeader(hdr, JSSequence, strconv.FormatUint(sm.seq, 10)) 4295 hdr = genHeader(hdr, JSTimeStamp, ts.Format(time.RFC3339Nano)) 4296 hdr = genHeader(hdr, JSNumPending, strconv.FormatUint(np, 10)) 4297 hdr = genHeader(hdr, JSLastSequence, strconv.FormatUint(lseq, 10)) 4298 } 4299 // Decrement num pending. This is optimization and we do not continue to look it up for these operations. 4300 np-- 4301 } else { 4302 if len(hdr) == 0 { 4303 hdr = fmt.Appendf(nil, dg, name, sm.subj, sm.seq, ts.Format(time.RFC3339Nano)) 4304 } else { 4305 hdr = copyBytes(hdr) 4306 hdr = genHeader(hdr, JSStream, name) 4307 hdr = genHeader(hdr, JSSubject, sm.subj) 4308 hdr = genHeader(hdr, JSSequence, strconv.FormatUint(sm.seq, 10)) 4309 hdr = genHeader(hdr, JSTimeStamp, ts.Format(time.RFC3339Nano)) 4310 } 4311 } 4312 // Track our lseq 4313 lseq = sm.seq 4314 // Send out our message. 4315 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, sm.msg, nil, 0)) 4316 // Check if we have exceeded max bytes. 4317 sentBytes += len(sm.subj) + len(sm.hdr) + len(sm.msg) 4318 if sentBytes >= mb { 4319 break 4320 } 4321 } 4322 4323 // If batch was requested send EOB. 4324 if isBatchRequest { 4325 // Update if the stream's lasts sequence has moved past our validThrough. 4326 if mset.lastSeq() > validThrough { 4327 np, _ = store.NumPending(seq, req.NextFor, false) 4328 } 4329 hdr := fmt.Appendf(nil, eob, np, lseq) 4330 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, hdr, nil, nil, 0)) 4331 } 4332 } 4333 4334 // processInboundJetStreamMsg handles processing messages bound for a stream. 4335 func (mset *stream) processInboundJetStreamMsg(_ *subscription, c *client, _ *Account, subject, reply string, rmsg []byte) { 4336 hdr, msg := c.msgParts(rmsg) 4337 // Copy these. 4338 if len(hdr) > 0 { 4339 hdr = copyBytes(hdr) 4340 } 4341 if len(msg) > 0 { 4342 msg = copyBytes(msg) 4343 } 4344 if mt, traceOnly := c.isMsgTraceEnabled(); mt != nil { 4345 // If message is delivered, we need to disable the message trace headers 4346 // to prevent a trace event to be generated when a stored message 4347 // is delivered to a consumer and routed. 4348 if !traceOnly { 4349 disableTraceHeaders(c, hdr) 4350 } 4351 // This will add the jetstream event while in the client read loop. 4352 // Since the event will be updated in a different go routine, the 4353 // tracing object will have a separate reference to the JS trace 4354 // object. 4355 mt.addJetStreamEvent(mset.name()) 4356 } 4357 mset.queueInbound(mset.msgs, subject, reply, hdr, msg, c.pa.trace) 4358 } 4359 4360 var ( 4361 errLastSeqMismatch = errors.New("last sequence mismatch") 4362 errMsgIdDuplicate = errors.New("msgid is duplicate") 4363 errStreamClosed = errors.New("stream closed") 4364 errInvalidMsgHandler = errors.New("undefined message handler") 4365 ) 4366 4367 // processJetStreamMsg is where we try to actually process the stream msg. 4368 func (mset *stream) processJetStreamMsg(subject, reply string, hdr, msg []byte, lseq uint64, ts int64, mt *msgTrace) (retErr error) { 4369 if mt != nil { 4370 // Only the leader/standalone will have mt!=nil. On exit, send the 4371 // message trace event. 4372 defer func() { 4373 mt.sendEventFromJetStream(retErr) 4374 }() 4375 } 4376 4377 if mset.closed.Load() { 4378 return errStreamClosed 4379 } 4380 4381 mset.mu.Lock() 4382 s, store := mset.srv, mset.store 4383 4384 traceOnly := mt.traceOnly() 4385 bumpCLFS := func() { 4386 // Do not bump if tracing and not doing message delivery. 4387 if traceOnly { 4388 return 4389 } 4390 mset.clfs++ 4391 } 4392 4393 // Apply the input subject transform if any 4394 if mset.itr != nil { 4395 ts, err := mset.itr.Match(subject) 4396 if err == nil { 4397 // no filtering: if the subject doesn't map the source of the transform, don't change it 4398 subject = ts 4399 } 4400 } 4401 4402 var accName string 4403 if mset.acc != nil { 4404 accName = mset.acc.Name 4405 } 4406 4407 js, jsa, doAck := mset.js, mset.jsa, !mset.cfg.NoAck 4408 name, stype := mset.cfg.Name, mset.cfg.Storage 4409 maxMsgSize := int(mset.cfg.MaxMsgSize) 4410 numConsumers := len(mset.consumers) 4411 interestRetention := mset.cfg.Retention == InterestPolicy 4412 // Snapshot if we are the leader and if we can respond. 4413 isLeader, isSealed := mset.isLeader(), mset.cfg.Sealed 4414 canRespond := doAck && len(reply) > 0 && isLeader 4415 4416 var resp = &JSPubAckResponse{} 4417 4418 // Bail here if sealed. 4419 if isSealed { 4420 outq := mset.outq 4421 bumpCLFS() 4422 mset.mu.Unlock() 4423 if canRespond && outq != nil { 4424 resp.PubAck = &PubAck{Stream: name} 4425 resp.Error = ApiErrors[JSStreamSealedErr] 4426 b, _ := json.Marshal(resp) 4427 outq.sendMsg(reply, b) 4428 } 4429 return ApiErrors[JSStreamSealedErr] 4430 } 4431 4432 var buf [256]byte 4433 pubAck := append(buf[:0], mset.pubAck...) 4434 4435 // If this is a non-clustered msg and we are not considered active, meaning no active subscription, do not process. 4436 if lseq == 0 && ts == 0 && !mset.active { 4437 mset.mu.Unlock() 4438 return nil 4439 } 4440 4441 // For clustering the lower layers will pass our expected lseq. If it is present check for that here. 4442 if lseq > 0 && lseq != (mset.lseq+mset.clfs) { 4443 isMisMatch := true 4444 // We may be able to recover here if we have no state whatsoever, or we are a mirror. 4445 // See if we have to adjust our starting sequence. 4446 if mset.lseq == 0 || mset.cfg.Mirror != nil { 4447 var state StreamState 4448 mset.store.FastState(&state) 4449 if state.FirstSeq == 0 { 4450 mset.store.Compact(lseq + 1) 4451 mset.lseq = lseq 4452 isMisMatch = false 4453 } 4454 } 4455 // Really is a mismatch. 4456 if isMisMatch { 4457 outq := mset.outq 4458 mset.mu.Unlock() 4459 if canRespond && outq != nil { 4460 resp.PubAck = &PubAck{Stream: name} 4461 resp.Error = ApiErrors[JSStreamSequenceNotMatchErr] 4462 b, _ := json.Marshal(resp) 4463 outq.sendMsg(reply, b) 4464 } 4465 return errLastSeqMismatch 4466 } 4467 } 4468 4469 // If we have received this message across an account we may have request information attached. 4470 // For now remove. TODO(dlc) - Should this be opt-in or opt-out? 4471 if len(hdr) > 0 { 4472 hdr = removeHeaderIfPresent(hdr, ClientInfoHdr) 4473 } 4474 4475 // Process additional msg headers if still present. 4476 var msgId string 4477 var rollupSub, rollupAll bool 4478 isClustered := mset.isClustered() 4479 4480 if len(hdr) > 0 { 4481 outq := mset.outq 4482 4483 // Certain checks have already been performed if in clustered mode, so only check if not. 4484 // Note, for cluster mode but with message tracing (without message delivery), we need 4485 // to do this check here since it was not done in processClusteredInboundMsg(). 4486 if !isClustered || traceOnly { 4487 // Expected stream. 4488 if sname := getExpectedStream(hdr); sname != _EMPTY_ && sname != name { 4489 bumpCLFS() 4490 mset.mu.Unlock() 4491 if canRespond { 4492 resp.PubAck = &PubAck{Stream: name} 4493 resp.Error = NewJSStreamNotMatchError() 4494 b, _ := json.Marshal(resp) 4495 outq.sendMsg(reply, b) 4496 } 4497 return errors.New("expected stream does not match") 4498 } 4499 } 4500 4501 // Dedupe detection. 4502 if msgId = getMsgId(hdr); msgId != _EMPTY_ { 4503 if dde := mset.checkMsgId(msgId); dde != nil { 4504 bumpCLFS() 4505 mset.mu.Unlock() 4506 if canRespond { 4507 response := append(pubAck, strconv.FormatUint(dde.seq, 10)...) 4508 response = append(response, ",\"duplicate\": true}"...) 4509 outq.sendMsg(reply, response) 4510 } 4511 return errMsgIdDuplicate 4512 } 4513 } 4514 // Expected last sequence per subject. 4515 // If we are clustered we have prechecked seq > 0. 4516 if seq, exists := getExpectedLastSeqPerSubject(hdr); exists { 4517 // TODO(dlc) - We could make a new store func that does this all in one. 4518 var smv StoreMsg 4519 var fseq uint64 4520 sm, err := store.LoadLastMsg(subject, &smv) 4521 if sm != nil { 4522 fseq = sm.seq 4523 } 4524 if err == ErrStoreMsgNotFound && seq == 0 { 4525 fseq, err = 0, nil 4526 } 4527 if err != nil || fseq != seq { 4528 bumpCLFS() 4529 mset.mu.Unlock() 4530 if canRespond { 4531 resp.PubAck = &PubAck{Stream: name} 4532 resp.Error = NewJSStreamWrongLastSequenceError(fseq) 4533 b, _ := json.Marshal(resp) 4534 outq.sendMsg(reply, b) 4535 } 4536 return fmt.Errorf("last sequence by subject mismatch: %d vs %d", seq, fseq) 4537 } 4538 } 4539 4540 // Expected last sequence. 4541 if seq, exists := getExpectedLastSeq(hdr); exists && seq != mset.lseq { 4542 mlseq := mset.lseq 4543 bumpCLFS() 4544 mset.mu.Unlock() 4545 if canRespond { 4546 resp.PubAck = &PubAck{Stream: name} 4547 resp.Error = NewJSStreamWrongLastSequenceError(mlseq) 4548 b, _ := json.Marshal(resp) 4549 outq.sendMsg(reply, b) 4550 } 4551 return fmt.Errorf("last sequence mismatch: %d vs %d", seq, mlseq) 4552 } 4553 // Expected last msgId. 4554 if lmsgId := getExpectedLastMsgId(hdr); lmsgId != _EMPTY_ { 4555 if mset.lmsgId == _EMPTY_ && !mset.ddloaded { 4556 mset.rebuildDedupe() 4557 } 4558 if lmsgId != mset.lmsgId { 4559 last := mset.lmsgId 4560 bumpCLFS() 4561 mset.mu.Unlock() 4562 if canRespond { 4563 resp.PubAck = &PubAck{Stream: name} 4564 resp.Error = NewJSStreamWrongLastMsgIDError(last) 4565 b, _ := json.Marshal(resp) 4566 outq.sendMsg(reply, b) 4567 } 4568 return fmt.Errorf("last msgid mismatch: %q vs %q", lmsgId, last) 4569 } 4570 } 4571 // Check for any rollups. 4572 if rollup := getRollup(hdr); rollup != _EMPTY_ { 4573 if !mset.cfg.AllowRollup || mset.cfg.DenyPurge { 4574 bumpCLFS() 4575 mset.mu.Unlock() 4576 if canRespond { 4577 resp.PubAck = &PubAck{Stream: name} 4578 resp.Error = NewJSStreamRollupFailedError(errors.New("rollup not permitted")) 4579 b, _ := json.Marshal(resp) 4580 outq.sendMsg(reply, b) 4581 } 4582 return errors.New("rollup not permitted") 4583 } 4584 switch rollup { 4585 case JSMsgRollupSubject: 4586 rollupSub = true 4587 case JSMsgRollupAll: 4588 rollupAll = true 4589 default: 4590 bumpCLFS() 4591 mset.mu.Unlock() 4592 err := fmt.Errorf("rollup value invalid: %q", rollup) 4593 if canRespond { 4594 resp.PubAck = &PubAck{Stream: name} 4595 resp.Error = NewJSStreamRollupFailedError(err) 4596 b, _ := json.Marshal(resp) 4597 outq.sendMsg(reply, b) 4598 } 4599 return err 4600 } 4601 } 4602 } 4603 4604 // Response Ack. 4605 var ( 4606 response []byte 4607 seq uint64 4608 err error 4609 ) 4610 4611 // Check to see if we are over the max msg size. 4612 if maxMsgSize >= 0 && (len(hdr)+len(msg)) > maxMsgSize { 4613 bumpCLFS() 4614 mset.mu.Unlock() 4615 if canRespond { 4616 resp.PubAck = &PubAck{Stream: name} 4617 resp.Error = NewJSStreamMessageExceedsMaximumError() 4618 response, _ = json.Marshal(resp) 4619 mset.outq.sendMsg(reply, response) 4620 } 4621 return ErrMaxPayload 4622 } 4623 4624 if len(hdr) > math.MaxUint16 { 4625 bumpCLFS() 4626 mset.mu.Unlock() 4627 if canRespond { 4628 resp.PubAck = &PubAck{Stream: name} 4629 resp.Error = NewJSStreamHeaderExceedsMaximumError() 4630 response, _ = json.Marshal(resp) 4631 mset.outq.sendMsg(reply, response) 4632 } 4633 return ErrMaxPayload 4634 } 4635 4636 // Check to see if we have exceeded our limits. 4637 if js.limitsExceeded(stype) { 4638 s.resourcesExceededError() 4639 bumpCLFS() 4640 mset.mu.Unlock() 4641 if canRespond { 4642 resp.PubAck = &PubAck{Stream: name} 4643 resp.Error = NewJSInsufficientResourcesError() 4644 response, _ = json.Marshal(resp) 4645 mset.outq.sendMsg(reply, response) 4646 } 4647 // Stepdown regardless. 4648 if node := mset.raftNode(); node != nil { 4649 node.StepDown() 4650 } 4651 return NewJSInsufficientResourcesError() 4652 } 4653 4654 var noInterest bool 4655 4656 // If we are interest based retention and have no consumers then we can skip. 4657 if interestRetention { 4658 if numConsumers == 0 { 4659 noInterest = true 4660 } else if mset.numFilter > 0 { 4661 // Assume no interest and check to disqualify. 4662 noInterest = true 4663 mset.clsMu.RLock() 4664 for _, o := range mset.cList { 4665 if o.cfg.FilterSubject == _EMPTY_ || subjectIsSubsetMatch(subject, o.cfg.FilterSubject) { 4666 noInterest = false 4667 break 4668 } 4669 } 4670 mset.clsMu.RUnlock() 4671 } 4672 } 4673 4674 // Grab timestamp if not already set. 4675 if ts == 0 && lseq > 0 { 4676 ts = time.Now().UnixNano() 4677 } 4678 4679 mt.updateJetStreamEvent(subject, noInterest) 4680 if traceOnly { 4681 mset.mu.Unlock() 4682 return nil 4683 } 4684 4685 // Skip msg here. 4686 if noInterest { 4687 mset.lseq = store.SkipMsg() 4688 mset.lmsgId = msgId 4689 // If we have a msgId make sure to save. 4690 if msgId != _EMPTY_ { 4691 mset.storeMsgIdLocked(&ddentry{msgId, seq, ts}) 4692 } 4693 if canRespond { 4694 response = append(pubAck, strconv.FormatUint(mset.lseq, 10)...) 4695 response = append(response, '}') 4696 mset.outq.sendMsg(reply, response) 4697 } 4698 mset.mu.Unlock() 4699 return nil 4700 } 4701 4702 // If here we will attempt to store the message. 4703 // Assume this will succeed. 4704 olmsgId := mset.lmsgId 4705 mset.lmsgId = msgId 4706 clfs := mset.clfs 4707 mset.lseq++ 4708 tierName := mset.tier 4709 4710 // Republish state if needed. 4711 var tsubj string 4712 var tlseq uint64 4713 var thdrsOnly bool 4714 if mset.tr != nil { 4715 tsubj, _ = mset.tr.Match(subject) 4716 if mset.cfg.RePublish != nil { 4717 thdrsOnly = mset.cfg.RePublish.HeadersOnly 4718 } 4719 } 4720 republish := tsubj != _EMPTY_ && isLeader 4721 4722 // If we are republishing grab last sequence for this exact subject. Aids in gap detection for lightweight clients. 4723 if republish { 4724 var smv StoreMsg 4725 if sm, _ := store.LoadLastMsg(subject, &smv); sm != nil { 4726 tlseq = sm.seq 4727 } 4728 } 4729 4730 // If clustered this was already checked and we do not want to check here and possibly introduce skew. 4731 if !isClustered { 4732 if exceeded, err := jsa.wouldExceedLimits(stype, tierName, mset.cfg.Replicas, subject, hdr, msg); exceeded { 4733 if err == nil { 4734 err = NewJSAccountResourcesExceededError() 4735 } 4736 s.RateLimitWarnf("JetStream resource limits exceeded for account: %q", accName) 4737 if canRespond { 4738 resp.PubAck = &PubAck{Stream: name} 4739 resp.Error = err 4740 response, _ = json.Marshal(resp) 4741 mset.outq.send(newJSPubMsg(reply, _EMPTY_, _EMPTY_, nil, response, nil, 0)) 4742 } 4743 } 4744 } 4745 4746 // Store actual msg. 4747 if lseq == 0 && ts == 0 { 4748 seq, ts, err = store.StoreMsg(subject, hdr, msg) 4749 } else { 4750 // Make sure to take into account any message assignments that we had to skip (clfs). 4751 seq = lseq + 1 - clfs 4752 // Check for preAcks and the need to skip vs store. 4753 if mset.hasAllPreAcks(seq, subject) { 4754 mset.clearAllPreAcks(seq) 4755 store.SkipMsg() 4756 } else { 4757 err = store.StoreRawMsg(subject, hdr, msg, seq, ts) 4758 } 4759 } 4760 4761 if err != nil { 4762 // If we did not succeed put those values back and increment clfs in case we are clustered. 4763 var state StreamState 4764 mset.store.FastState(&state) 4765 mset.lseq = state.LastSeq 4766 mset.lmsgId = olmsgId 4767 bumpCLFS() 4768 mset.mu.Unlock() 4769 4770 switch err { 4771 case ErrMaxMsgs, ErrMaxBytes, ErrMaxMsgsPerSubject, ErrMsgTooLarge: 4772 s.RateLimitDebugf("JetStream failed to store a msg on stream '%s > %s': %v", accName, name, err) 4773 case ErrStoreClosed: 4774 default: 4775 s.Errorf("JetStream failed to store a msg on stream '%s > %s': %v", accName, name, err) 4776 } 4777 4778 if canRespond { 4779 resp.PubAck = &PubAck{Stream: name} 4780 resp.Error = NewJSStreamStoreFailedError(err, Unless(err)) 4781 response, _ = json.Marshal(resp) 4782 mset.outq.sendMsg(reply, response) 4783 } 4784 return err 4785 } 4786 4787 // If we have a msgId make sure to save. 4788 if msgId != _EMPTY_ { 4789 mset.storeMsgIdLocked(&ddentry{msgId, seq, ts}) 4790 } 4791 4792 // If here we succeeded in storing the message. 4793 mset.mu.Unlock() 4794 4795 // No errors, this is the normal path. 4796 if rollupSub { 4797 mset.purge(&JSApiStreamPurgeRequest{Subject: subject, Keep: 1}) 4798 } else if rollupAll { 4799 mset.purge(&JSApiStreamPurgeRequest{Keep: 1}) 4800 } 4801 4802 // Check for republish. 4803 if republish { 4804 tsStr := time.Unix(0, ts).UTC().Format(time.RFC3339Nano) 4805 var rpMsg []byte 4806 if len(hdr) == 0 { 4807 const ht = "NATS/1.0\r\nNats-Stream: %s\r\nNats-Subject: %s\r\nNats-Sequence: %d\r\nNats-Time-Stamp: %s\r\nNats-Last-Sequence: %d\r\n\r\n" 4808 const htho = "NATS/1.0\r\nNats-Stream: %s\r\nNats-Subject: %s\r\nNats-Sequence: %d\r\nNats-Time-Stamp: %s\r\nNats-Last-Sequence: %d\r\nNats-Msg-Size: %d\r\n\r\n" 4809 if !thdrsOnly { 4810 hdr = fmt.Appendf(nil, ht, name, subject, seq, tsStr, tlseq) 4811 rpMsg = copyBytes(msg) 4812 } else { 4813 hdr = fmt.Appendf(nil, htho, name, subject, seq, tsStr, tlseq, len(msg)) 4814 } 4815 } else { 4816 // Slow path. 4817 hdr = genHeader(hdr, JSStream, name) 4818 hdr = genHeader(hdr, JSSubject, subject) 4819 hdr = genHeader(hdr, JSSequence, strconv.FormatUint(seq, 10)) 4820 hdr = genHeader(hdr, JSTimeStamp, tsStr) 4821 hdr = genHeader(hdr, JSLastSequence, strconv.FormatUint(tlseq, 10)) 4822 if !thdrsOnly { 4823 rpMsg = copyBytes(msg) 4824 } else { 4825 hdr = genHeader(hdr, JSMsgSize, strconv.Itoa(len(msg))) 4826 } 4827 } 4828 mset.outq.send(newJSPubMsg(tsubj, _EMPTY_, _EMPTY_, copyBytes(hdr), rpMsg, nil, seq)) 4829 } 4830 4831 // Send response here. 4832 if canRespond { 4833 response = append(pubAck, strconv.FormatUint(seq, 10)...) 4834 response = append(response, '}') 4835 mset.outq.sendMsg(reply, response) 4836 } 4837 4838 // Signal consumers for new messages. 4839 if numConsumers > 0 { 4840 mset.sigq.push(newCMsg(subject, seq)) 4841 select { 4842 case mset.sch <- struct{}{}: 4843 default: 4844 } 4845 } 4846 4847 return nil 4848 } 4849 4850 // Used to signal inbound message to registered consumers. 4851 type cMsg struct { 4852 seq uint64 4853 subj string 4854 } 4855 4856 // Pool to recycle consumer bound msgs. 4857 var cMsgPool sync.Pool 4858 4859 // Used to queue up consumer bound msgs for signaling. 4860 func newCMsg(subj string, seq uint64) *cMsg { 4861 var m *cMsg 4862 cm := cMsgPool.Get() 4863 if cm != nil { 4864 m = cm.(*cMsg) 4865 } else { 4866 m = new(cMsg) 4867 } 4868 m.subj, m.seq = subj, seq 4869 4870 return m 4871 } 4872 4873 func (m *cMsg) returnToPool() { 4874 if m == nil { 4875 return 4876 } 4877 m.subj, m.seq = _EMPTY_, 0 4878 cMsgPool.Put(m) 4879 } 4880 4881 // Go routine to signal consumers. 4882 // Offloaded from stream msg processing. 4883 func (mset *stream) signalConsumersLoop() { 4884 mset.mu.RLock() 4885 s, qch, sch, msgs := mset.srv, mset.qch, mset.sch, mset.sigq 4886 mset.mu.RUnlock() 4887 4888 for { 4889 select { 4890 case <-s.quitCh: 4891 return 4892 case <-qch: 4893 return 4894 case <-sch: 4895 cms := msgs.pop() 4896 for _, m := range cms { 4897 seq, subj := m.seq, m.subj 4898 m.returnToPool() 4899 // Signal all appropriate consumers. 4900 mset.signalConsumers(subj, seq) 4901 } 4902 msgs.recycle(&cms) 4903 } 4904 } 4905 } 4906 4907 // This will update and signal all consumers that match. 4908 func (mset *stream) signalConsumers(subj string, seq uint64) { 4909 mset.clsMu.RLock() 4910 if mset.csl == nil { 4911 mset.clsMu.RUnlock() 4912 return 4913 } 4914 r := mset.csl.Match(subj) 4915 mset.clsMu.RUnlock() 4916 4917 if len(r.psubs) == 0 { 4918 return 4919 } 4920 // Encode the sequence here. 4921 var eseq [8]byte 4922 var le = binary.LittleEndian 4923 le.PutUint64(eseq[:], seq) 4924 msg := eseq[:] 4925 for _, sub := range r.psubs { 4926 sub.icb(sub, nil, nil, subj, _EMPTY_, msg) 4927 } 4928 } 4929 4930 // Internal message for use by jetstream subsystem. 4931 type jsPubMsg struct { 4932 dsubj string // Subject to send to, e.g. _INBOX.xxx 4933 reply string 4934 StoreMsg 4935 o *consumer 4936 } 4937 4938 var jsPubMsgPool sync.Pool 4939 4940 func newJSPubMsg(dsubj, subj, reply string, hdr, msg []byte, o *consumer, seq uint64) *jsPubMsg { 4941 var m *jsPubMsg 4942 var buf []byte 4943 pm := jsPubMsgPool.Get() 4944 if pm != nil { 4945 m = pm.(*jsPubMsg) 4946 buf = m.buf[:0] 4947 } else { 4948 m = new(jsPubMsg) 4949 } 4950 // When getting something from a pool it is critical that all fields are 4951 // initialized. Doing this way guarantees that if someone adds a field to 4952 // the structure, the compiler will fail the build if this line is not updated. 4953 (*m) = jsPubMsg{dsubj, reply, StoreMsg{subj, hdr, msg, buf, seq, 0}, o} 4954 4955 return m 4956 } 4957 4958 // Gets a jsPubMsg from the pool. 4959 func getJSPubMsgFromPool() *jsPubMsg { 4960 pm := jsPubMsgPool.Get() 4961 if pm != nil { 4962 return pm.(*jsPubMsg) 4963 } 4964 return new(jsPubMsg) 4965 } 4966 4967 func (pm *jsPubMsg) returnToPool() { 4968 if pm == nil { 4969 return 4970 } 4971 pm.subj, pm.dsubj, pm.reply, pm.hdr, pm.msg, pm.o = _EMPTY_, _EMPTY_, _EMPTY_, nil, nil, nil 4972 if len(pm.buf) > 0 { 4973 pm.buf = pm.buf[:0] 4974 } 4975 jsPubMsgPool.Put(pm) 4976 } 4977 4978 func (pm *jsPubMsg) size() int { 4979 if pm == nil { 4980 return 0 4981 } 4982 return len(pm.dsubj) + len(pm.reply) + len(pm.hdr) + len(pm.msg) 4983 } 4984 4985 // Queue of *jsPubMsg for sending internal system messages. 4986 type jsOutQ struct { 4987 *ipQueue[*jsPubMsg] 4988 } 4989 4990 func (q *jsOutQ) sendMsg(subj string, msg []byte) { 4991 if q != nil { 4992 q.send(newJSPubMsg(subj, _EMPTY_, _EMPTY_, nil, msg, nil, 0)) 4993 } 4994 } 4995 4996 func (q *jsOutQ) send(msg *jsPubMsg) { 4997 if q == nil || msg == nil { 4998 return 4999 } 5000 q.push(msg) 5001 } 5002 5003 func (q *jsOutQ) unregister() { 5004 if q == nil { 5005 return 5006 } 5007 q.ipQueue.unregister() 5008 } 5009 5010 // StoredMsg is for raw access to messages in a stream. 5011 type StoredMsg struct { 5012 Subject string `json:"subject"` 5013 Sequence uint64 `json:"seq"` 5014 Header []byte `json:"hdrs,omitempty"` 5015 Data []byte `json:"data,omitempty"` 5016 Time time.Time `json:"time"` 5017 } 5018 5019 // This is similar to system semantics but did not want to overload the single system sendq, 5020 // or require system account when doing simple setup with jetstream. 5021 func (mset *stream) setupSendCapabilities() { 5022 mset.mu.Lock() 5023 defer mset.mu.Unlock() 5024 if mset.outq != nil { 5025 return 5026 } 5027 qname := fmt.Sprintf("[ACC:%s] stream '%s' sendQ", mset.acc.Name, mset.cfg.Name) 5028 mset.outq = &jsOutQ{newIPQueue[*jsPubMsg](mset.srv, qname)} 5029 go mset.internalLoop() 5030 } 5031 5032 // Returns the associated account name. 5033 func (mset *stream) accName() string { 5034 if mset == nil { 5035 return _EMPTY_ 5036 } 5037 mset.mu.RLock() 5038 acc := mset.acc 5039 mset.mu.RUnlock() 5040 return acc.Name 5041 } 5042 5043 // Name returns the stream name. 5044 func (mset *stream) name() string { 5045 if mset == nil { 5046 return _EMPTY_ 5047 } 5048 mset.mu.RLock() 5049 defer mset.mu.RUnlock() 5050 return mset.cfg.Name 5051 } 5052 5053 func (mset *stream) internalLoop() { 5054 mset.mu.RLock() 5055 s := mset.srv 5056 c := s.createInternalJetStreamClient() 5057 c.registerWithAccount(mset.acc) 5058 defer c.closeConnection(ClientClosed) 5059 outq, qch, msgs, gets := mset.outq, mset.qch, mset.msgs, mset.gets 5060 5061 // For the ack msgs queue for interest retention. 5062 var ( 5063 amch chan struct{} 5064 ackq *ipQueue[uint64] 5065 ) 5066 if mset.ackq != nil { 5067 ackq, amch = mset.ackq, mset.ackq.ch 5068 } 5069 mset.mu.RUnlock() 5070 5071 // Raw scratch buffer. 5072 // This should be rarely used now so can be smaller. 5073 var _r [1024]byte 5074 5075 // To optimize for not converting a string to a []byte slice. 5076 var ( 5077 subj [256]byte 5078 dsubj [256]byte 5079 rply [256]byte 5080 szb [10]byte 5081 hdb [10]byte 5082 ) 5083 5084 for { 5085 select { 5086 case <-outq.ch: 5087 pms := outq.pop() 5088 for _, pm := range pms { 5089 c.pa.subject = append(dsubj[:0], pm.dsubj...) 5090 c.pa.deliver = append(subj[:0], pm.subj...) 5091 c.pa.size = len(pm.msg) + len(pm.hdr) 5092 c.pa.szb = append(szb[:0], strconv.Itoa(c.pa.size)...) 5093 if len(pm.reply) > 0 { 5094 c.pa.reply = append(rply[:0], pm.reply...) 5095 } else { 5096 c.pa.reply = nil 5097 } 5098 5099 // If we have an underlying buf that is the wire contents for hdr + msg, else construct on the fly. 5100 var msg []byte 5101 if len(pm.buf) > 0 { 5102 msg = pm.buf 5103 } else { 5104 if len(pm.hdr) > 0 { 5105 msg = pm.hdr 5106 if len(pm.msg) > 0 { 5107 msg = _r[:0] 5108 msg = append(msg, pm.hdr...) 5109 msg = append(msg, pm.msg...) 5110 } 5111 } else if len(pm.msg) > 0 { 5112 // We own this now from a low level buffer perspective so can use directly here. 5113 msg = pm.msg 5114 } 5115 } 5116 5117 if len(pm.hdr) > 0 { 5118 c.pa.hdr = len(pm.hdr) 5119 c.pa.hdb = []byte(strconv.Itoa(c.pa.hdr)) 5120 c.pa.hdb = append(hdb[:0], strconv.Itoa(c.pa.hdr)...) 5121 } else { 5122 c.pa.hdr = -1 5123 c.pa.hdb = nil 5124 } 5125 5126 msg = append(msg, _CRLF_...) 5127 5128 didDeliver, _ := c.processInboundClientMsg(msg) 5129 c.pa.szb, c.pa.subject, c.pa.deliver = nil, nil, nil 5130 5131 // Check to see if this is a delivery for a consumer and 5132 // we failed to deliver the message. If so alert the consumer. 5133 if pm.o != nil && pm.seq > 0 && !didDeliver { 5134 pm.o.didNotDeliver(pm.seq, pm.dsubj) 5135 } 5136 pm.returnToPool() 5137 } 5138 // TODO: Move in the for-loop? 5139 c.flushClients(0) 5140 outq.recycle(&pms) 5141 case <-msgs.ch: 5142 // This can possibly change now so needs to be checked here. 5143 isClustered := mset.IsClustered() 5144 ims := msgs.pop() 5145 for _, im := range ims { 5146 // If we are clustered we need to propose this message to the underlying raft group. 5147 if isClustered { 5148 mset.processClusteredInboundMsg(im.subj, im.rply, im.hdr, im.msg, im.mt) 5149 } else { 5150 mset.processJetStreamMsg(im.subj, im.rply, im.hdr, im.msg, 0, 0, im.mt) 5151 } 5152 } 5153 msgs.recycle(&ims) 5154 case <-gets.ch: 5155 dgs := gets.pop() 5156 for _, dg := range dgs { 5157 mset.getDirectRequest(&dg.req, dg.reply) 5158 dgPool.Put(dg) 5159 } 5160 gets.recycle(&dgs) 5161 5162 case <-amch: 5163 seqs := ackq.pop() 5164 for _, seq := range seqs { 5165 mset.ackMsg(nil, seq) 5166 } 5167 ackq.recycle(&seqs) 5168 case <-qch: 5169 return 5170 case <-s.quitCh: 5171 return 5172 } 5173 } 5174 } 5175 5176 // Used to break consumers out of their monitorConsumer go routines. 5177 func (mset *stream) resetAndWaitOnConsumers() { 5178 mset.mu.RLock() 5179 consumers := make([]*consumer, 0, len(mset.consumers)) 5180 for _, o := range mset.consumers { 5181 consumers = append(consumers, o) 5182 } 5183 mset.mu.RUnlock() 5184 5185 for _, o := range consumers { 5186 if node := o.raftNode(); node != nil { 5187 if o.IsLeader() { 5188 node.StepDown() 5189 } 5190 node.Delete() 5191 } 5192 if o.isMonitorRunning() { 5193 o.monitorWg.Wait() 5194 } 5195 } 5196 } 5197 5198 // Internal function to delete a stream. 5199 func (mset *stream) delete() error { 5200 if mset == nil { 5201 return nil 5202 } 5203 return mset.stop(true, true) 5204 } 5205 5206 // Internal function to stop or delete the stream. 5207 func (mset *stream) stop(deleteFlag, advisory bool) error { 5208 mset.mu.RLock() 5209 js, jsa, name := mset.js, mset.jsa, mset.cfg.Name 5210 mset.mu.RUnlock() 5211 5212 if jsa == nil { 5213 return NewJSNotEnabledForAccountError() 5214 } 5215 5216 // Remove from our account map first. 5217 jsa.mu.Lock() 5218 delete(jsa.streams, name) 5219 accName := jsa.account.Name 5220 jsa.mu.Unlock() 5221 5222 // Mark as closed, kick monitor and collect consumers first. 5223 mset.closed.Store(true) 5224 5225 mset.mu.Lock() 5226 // Signal to the monitor loop. 5227 // Can't use qch here. 5228 if mset.mqch != nil { 5229 close(mset.mqch) 5230 mset.mqch = nil 5231 } 5232 5233 // Stop responding to sync requests. 5234 mset.stopClusterSubs() 5235 // Unsubscribe from direct stream. 5236 mset.unsubscribeToStream(true) 5237 5238 // Our info sub if we spun it up. 5239 if mset.infoSub != nil { 5240 mset.srv.sysUnsubscribe(mset.infoSub) 5241 mset.infoSub = nil 5242 } 5243 5244 // Clean up consumers. 5245 var obs []*consumer 5246 for _, o := range mset.consumers { 5247 obs = append(obs, o) 5248 } 5249 mset.clsMu.Lock() 5250 mset.consumers, mset.cList, mset.csl = nil, nil, nil 5251 mset.clsMu.Unlock() 5252 5253 // Check if we are a mirror. 5254 if mset.mirror != nil && mset.mirror.sub != nil { 5255 mset.unsubscribe(mset.mirror.sub) 5256 mset.mirror.sub = nil 5257 mset.removeInternalConsumer(mset.mirror) 5258 } 5259 // Now check for sources. 5260 if len(mset.sources) > 0 { 5261 for _, si := range mset.sources { 5262 mset.cancelSourceConsumer(si.iname) 5263 } 5264 } 5265 mset.mu.Unlock() 5266 5267 isShuttingDown := js.isShuttingDown() 5268 for _, o := range obs { 5269 if !o.isClosed() { 5270 // Third flag says do not broadcast a signal. 5271 // TODO(dlc) - If we have an err here we don't want to stop 5272 // but should we log? 5273 o.stopWithFlags(deleteFlag, deleteFlag, false, advisory) 5274 if !isShuttingDown { 5275 o.monitorWg.Wait() 5276 } 5277 } 5278 } 5279 5280 mset.mu.Lock() 5281 // Send stream delete advisory after the consumers. 5282 if deleteFlag && advisory { 5283 mset.sendDeleteAdvisoryLocked() 5284 } 5285 5286 // Quit channel, do this after sending the delete advisory 5287 if mset.qch != nil { 5288 close(mset.qch) 5289 mset.qch = nil 5290 } 5291 5292 // Cluster cleanup 5293 var sa *streamAssignment 5294 if n := mset.node; n != nil { 5295 if deleteFlag { 5296 n.Delete() 5297 sa = mset.sa 5298 } else { 5299 // Always attempt snapshot on clean exit. 5300 n.InstallSnapshot(mset.stateSnapshotLocked()) 5301 n.Stop() 5302 } 5303 } 5304 5305 // Cleanup duplicate timer if running. 5306 if mset.ddtmr != nil { 5307 mset.ddtmr.Stop() 5308 mset.ddtmr = nil 5309 mset.ddmap = nil 5310 mset.ddarr = nil 5311 mset.ddindex = 0 5312 } 5313 5314 sysc := mset.sysc 5315 mset.sysc = nil 5316 5317 if deleteFlag { 5318 // Unregistering ipQueues do not prevent them from push/pop 5319 // just will remove them from the central monitoring map 5320 mset.msgs.unregister() 5321 mset.ackq.unregister() 5322 mset.outq.unregister() 5323 mset.sigq.unregister() 5324 } 5325 5326 // Snapshot store. 5327 store := mset.store 5328 c := mset.client 5329 5330 // Clustered cleanup. 5331 mset.mu.Unlock() 5332 5333 // Check if the stream assignment has the group node specified. 5334 // We need this cleared for if the stream gets reassigned here. 5335 if sa != nil { 5336 js.mu.Lock() 5337 if sa.Group != nil { 5338 sa.Group.node = nil 5339 } 5340 js.mu.Unlock() 5341 } 5342 5343 if c != nil { 5344 c.closeConnection(ClientClosed) 5345 } 5346 5347 if sysc != nil { 5348 sysc.closeConnection(ClientClosed) 5349 } 5350 5351 if deleteFlag { 5352 if store != nil { 5353 // Ignore errors. 5354 store.Delete() 5355 } 5356 // Release any resources. 5357 js.releaseStreamResources(&mset.cfg) 5358 // cleanup directories after the stream 5359 accDir := filepath.Join(js.config.StoreDir, accName) 5360 // Do cleanup in separate go routine similar to how fs will use purge here.. 5361 go func() { 5362 // no op if not empty 5363 os.Remove(filepath.Join(accDir, streamsDir)) 5364 os.Remove(accDir) 5365 }() 5366 } else if store != nil { 5367 // Ignore errors. 5368 store.Stop() 5369 } 5370 5371 return nil 5372 } 5373 5374 func (mset *stream) getMsg(seq uint64) (*StoredMsg, error) { 5375 var smv StoreMsg 5376 sm, err := mset.store.LoadMsg(seq, &smv) 5377 if err != nil { 5378 return nil, err 5379 } 5380 // This only used in tests directly so no need to pool etc. 5381 return &StoredMsg{ 5382 Subject: sm.subj, 5383 Sequence: sm.seq, 5384 Header: sm.hdr, 5385 Data: sm.msg, 5386 Time: time.Unix(0, sm.ts).UTC(), 5387 }, nil 5388 } 5389 5390 // getConsumers will return a copy of all the current consumers for this stream. 5391 func (mset *stream) getConsumers() []*consumer { 5392 mset.clsMu.RLock() 5393 defer mset.clsMu.RUnlock() 5394 return append([]*consumer(nil), mset.cList...) 5395 } 5396 5397 // Lock should be held for this one. 5398 func (mset *stream) numPublicConsumers() int { 5399 return len(mset.consumers) - mset.directs 5400 } 5401 5402 // This returns all consumers that are not DIRECT. 5403 func (mset *stream) getPublicConsumers() []*consumer { 5404 mset.clsMu.RLock() 5405 defer mset.clsMu.RUnlock() 5406 5407 var obs []*consumer 5408 for _, o := range mset.cList { 5409 if !o.cfg.Direct { 5410 obs = append(obs, o) 5411 } 5412 } 5413 return obs 5414 } 5415 5416 func (mset *stream) isInterestRetention() bool { 5417 mset.mu.RLock() 5418 defer mset.mu.RUnlock() 5419 return mset.cfg.Retention != LimitsPolicy 5420 } 5421 5422 // NumConsumers reports on number of active consumers for this stream. 5423 func (mset *stream) numConsumers() int { 5424 mset.mu.RLock() 5425 defer mset.mu.RUnlock() 5426 return len(mset.consumers) 5427 } 5428 5429 // Lock should be held. 5430 func (mset *stream) setConsumer(o *consumer) { 5431 mset.consumers[o.name] = o 5432 if len(o.subjf) > 0 { 5433 mset.numFilter++ 5434 } 5435 if o.cfg.Direct { 5436 mset.directs++ 5437 } 5438 // Now update consumers list as well 5439 mset.clsMu.Lock() 5440 mset.cList = append(mset.cList, o) 5441 mset.clsMu.Unlock() 5442 } 5443 5444 // Lock should be held. 5445 func (mset *stream) removeConsumer(o *consumer) { 5446 if o.cfg.FilterSubject != _EMPTY_ && mset.numFilter > 0 { 5447 mset.numFilter-- 5448 } 5449 if o.cfg.Direct && mset.directs > 0 { 5450 mset.directs-- 5451 } 5452 if mset.consumers != nil { 5453 delete(mset.consumers, o.name) 5454 // Now update consumers list as well 5455 mset.clsMu.Lock() 5456 for i, ol := range mset.cList { 5457 if ol == o { 5458 mset.cList = append(mset.cList[:i], mset.cList[i+1:]...) 5459 break 5460 } 5461 } 5462 // Always remove from the leader sublist. 5463 if mset.csl != nil { 5464 for _, sub := range o.signalSubs() { 5465 mset.csl.Remove(sub) 5466 } 5467 } 5468 mset.clsMu.Unlock() 5469 } 5470 } 5471 5472 // Set the consumer as a leader. This will update signaling sublist. 5473 func (mset *stream) setConsumerAsLeader(o *consumer) { 5474 mset.clsMu.Lock() 5475 defer mset.clsMu.Unlock() 5476 5477 if mset.csl == nil { 5478 mset.csl = NewSublistWithCache() 5479 } 5480 for _, sub := range o.signalSubs() { 5481 mset.csl.Insert(sub) 5482 } 5483 } 5484 5485 // Remove the consumer as a leader. This will update signaling sublist. 5486 func (mset *stream) removeConsumerAsLeader(o *consumer) { 5487 mset.clsMu.Lock() 5488 defer mset.clsMu.Unlock() 5489 if mset.csl != nil { 5490 for _, sub := range o.signalSubs() { 5491 mset.csl.Remove(sub) 5492 } 5493 } 5494 } 5495 5496 // swapSigSubs will update signal Subs for a new subject filter. 5497 // consumer lock should not be held. 5498 func (mset *stream) swapSigSubs(o *consumer, newFilters []string) { 5499 mset.clsMu.Lock() 5500 o.mu.Lock() 5501 5502 if o.closed || o.mset == nil { 5503 o.mu.Unlock() 5504 return 5505 } 5506 5507 if o.sigSubs != nil { 5508 if mset.csl != nil { 5509 for _, sub := range o.sigSubs { 5510 mset.csl.Remove(sub) 5511 } 5512 } 5513 o.sigSubs = nil 5514 } 5515 5516 if o.isLeader() { 5517 if mset.csl == nil { 5518 mset.csl = NewSublistWithCache() 5519 } 5520 // If no filters are preset, add fwcs to sublist for that consumer. 5521 if newFilters == nil { 5522 sub := &subscription{subject: []byte(fwcs), icb: o.processStreamSignal} 5523 mset.csl.Insert(sub) 5524 o.sigSubs = append(o.sigSubs, sub) 5525 // If there are filters, add their subjects to sublist. 5526 } else { 5527 for _, filter := range newFilters { 5528 sub := &subscription{subject: []byte(filter), icb: o.processStreamSignal} 5529 mset.csl.Insert(sub) 5530 o.sigSubs = append(o.sigSubs, sub) 5531 } 5532 } 5533 } 5534 o.mu.Unlock() 5535 mset.clsMu.Unlock() 5536 5537 mset.mu.Lock() 5538 defer mset.mu.Unlock() 5539 5540 if mset.numFilter > 0 && len(o.subjf) > 0 { 5541 mset.numFilter-- 5542 } 5543 if len(newFilters) > 0 { 5544 mset.numFilter++ 5545 } 5546 } 5547 5548 // lookupConsumer will retrieve a consumer by name. 5549 func (mset *stream) lookupConsumer(name string) *consumer { 5550 mset.mu.RLock() 5551 defer mset.mu.RUnlock() 5552 return mset.consumers[name] 5553 } 5554 5555 func (mset *stream) numDirectConsumers() (num int) { 5556 mset.clsMu.RLock() 5557 defer mset.clsMu.RUnlock() 5558 5559 // Consumers that are direct are not recorded at the store level. 5560 for _, o := range mset.cList { 5561 o.mu.RLock() 5562 if o.cfg.Direct { 5563 num++ 5564 } 5565 o.mu.RUnlock() 5566 } 5567 return num 5568 } 5569 5570 // State will return the current state for this stream. 5571 func (mset *stream) state() StreamState { 5572 return mset.stateWithDetail(false) 5573 } 5574 5575 func (mset *stream) stateWithDetail(details bool) StreamState { 5576 // mset.store does not change once set, so ok to reference here directly. 5577 // We do this elsewhere as well. 5578 store := mset.store 5579 if store == nil { 5580 return StreamState{} 5581 } 5582 5583 // Currently rely on store for details. 5584 if details { 5585 return store.State() 5586 } 5587 // Here we do the fast version. 5588 var state StreamState 5589 store.FastState(&state) 5590 return state 5591 } 5592 5593 func (mset *stream) Store() StreamStore { 5594 mset.mu.RLock() 5595 defer mset.mu.RUnlock() 5596 return mset.store 5597 } 5598 5599 // Determines if the new proposed partition is unique amongst all consumers. 5600 // Lock should be held. 5601 func (mset *stream) partitionUnique(name string, partitions []string) bool { 5602 for _, partition := range partitions { 5603 psa := [32]string{} 5604 pts := tokenizeSubjectIntoSlice(psa[:0], partition) 5605 for n, o := range mset.consumers { 5606 // Skip the consumer being checked. 5607 if n == name { 5608 continue 5609 } 5610 if o.subjf == nil { 5611 return false 5612 } 5613 for _, filter := range o.subjf { 5614 if isSubsetMatchTokenized(pts, filter.tokenizedSubject) || 5615 isSubsetMatchTokenized(filter.tokenizedSubject, pts) { 5616 return false 5617 } 5618 } 5619 } 5620 } 5621 return true 5622 } 5623 5624 // Lock should be held. 5625 func (mset *stream) potentialFilteredConsumers() bool { 5626 numSubjects := len(mset.cfg.Subjects) 5627 if len(mset.consumers) == 0 || numSubjects == 0 { 5628 return false 5629 } 5630 if numSubjects > 1 || subjectHasWildcard(mset.cfg.Subjects[0]) { 5631 return true 5632 } 5633 return false 5634 } 5635 5636 // Check if there is no interest in this sequence number across our consumers. 5637 // The consumer passed is optional if we are processing the ack for that consumer. 5638 // Write lock should be held. 5639 func (mset *stream) noInterest(seq uint64, obs *consumer) bool { 5640 return !mset.checkForInterest(seq, obs) 5641 } 5642 5643 // Check if there is no interest in this sequence number and subject across our consumers. 5644 // The consumer passed is optional if we are processing the ack for that consumer. 5645 // Write lock should be held. 5646 func (mset *stream) noInterestWithSubject(seq uint64, subj string, obs *consumer) bool { 5647 return !mset.checkForInterestWithSubject(seq, subj, obs) 5648 } 5649 5650 // Write lock should be held here for the stream to avoid race conditions on state. 5651 func (mset *stream) checkForInterest(seq uint64, obs *consumer) bool { 5652 var subj string 5653 if mset.potentialFilteredConsumers() { 5654 pmsg := getJSPubMsgFromPool() 5655 defer pmsg.returnToPool() 5656 sm, err := mset.store.LoadMsg(seq, &pmsg.StoreMsg) 5657 if err != nil { 5658 if err == ErrStoreEOF { 5659 // Register this as a preAck. 5660 mset.registerPreAck(obs, seq) 5661 return true 5662 } 5663 mset.clearAllPreAcks(seq) 5664 return false 5665 } 5666 subj = sm.subj 5667 } 5668 return mset.checkForInterestWithSubject(seq, subj, obs) 5669 } 5670 5671 // Checks for interest given a sequence and subject. 5672 func (mset *stream) checkForInterestWithSubject(seq uint64, subj string, obs *consumer) bool { 5673 for _, o := range mset.consumers { 5674 // If this is us or we have a registered preAck for this consumer continue inspecting. 5675 if o == obs || mset.hasPreAck(o, seq) { 5676 continue 5677 } 5678 // Check if we need an ack. 5679 if o.needAck(seq, subj) { 5680 return true 5681 } 5682 } 5683 mset.clearAllPreAcks(seq) 5684 return false 5685 } 5686 5687 // Check if we have a pre-registered ack for this sequence. 5688 // Write lock should be held. 5689 func (mset *stream) hasPreAck(o *consumer, seq uint64) bool { 5690 if o == nil || len(mset.preAcks) == 0 { 5691 return false 5692 } 5693 consumers := mset.preAcks[seq] 5694 if len(consumers) == 0 { 5695 return false 5696 } 5697 _, found := consumers[o] 5698 return found 5699 } 5700 5701 // Check if we have all consumers pre-acked for this sequence and subject. 5702 // Write lock should be held. 5703 func (mset *stream) hasAllPreAcks(seq uint64, subj string) bool { 5704 if len(mset.preAcks) == 0 || len(mset.preAcks[seq]) == 0 { 5705 return false 5706 } 5707 // Since these can be filtered and mutually exclusive, 5708 // if we have some preAcks we need to check all interest here. 5709 return mset.noInterestWithSubject(seq, subj, nil) 5710 } 5711 5712 // Check if we have all consumers pre-acked. 5713 // Write lock should be held. 5714 func (mset *stream) clearAllPreAcks(seq uint64) { 5715 delete(mset.preAcks, seq) 5716 } 5717 5718 // Clear all preAcks below floor. 5719 // Write lock should be held. 5720 func (mset *stream) clearAllPreAcksBelowFloor(floor uint64) { 5721 for seq := range mset.preAcks { 5722 if seq < floor { 5723 delete(mset.preAcks, seq) 5724 } 5725 } 5726 } 5727 5728 // This will register an ack for a consumer if it arrives before the actual message. 5729 func (mset *stream) registerPreAckLock(o *consumer, seq uint64) { 5730 mset.mu.Lock() 5731 defer mset.mu.Unlock() 5732 mset.registerPreAck(o, seq) 5733 } 5734 5735 // This will register an ack for a consumer if it arrives before 5736 // the actual message. 5737 // Write lock should be held. 5738 func (mset *stream) registerPreAck(o *consumer, seq uint64) { 5739 if o == nil { 5740 return 5741 } 5742 if mset.preAcks == nil { 5743 mset.preAcks = make(map[uint64]map[*consumer]struct{}) 5744 } 5745 if mset.preAcks[seq] == nil { 5746 mset.preAcks[seq] = make(map[*consumer]struct{}) 5747 } 5748 mset.preAcks[seq][o] = struct{}{} 5749 } 5750 5751 // This will clear an ack for a consumer. 5752 // Write lock should be held. 5753 func (mset *stream) clearPreAck(o *consumer, seq uint64) { 5754 if o == nil || len(mset.preAcks) == 0 { 5755 return 5756 } 5757 if consumers := mset.preAcks[seq]; len(consumers) > 0 { 5758 delete(consumers, o) 5759 if len(consumers) == 0 { 5760 delete(mset.preAcks, seq) 5761 } 5762 } 5763 } 5764 5765 // ackMsg is called into from a consumer when we have a WorkQueue or Interest Retention Policy. 5766 func (mset *stream) ackMsg(o *consumer, seq uint64) { 5767 if seq == 0 { 5768 return 5769 } 5770 5771 // Don't make this RLock(). We need to have only 1 running at a time to gauge interest across all consumers. 5772 mset.mu.Lock() 5773 if mset.closed.Load() || mset.cfg.Retention == LimitsPolicy { 5774 mset.mu.Unlock() 5775 return 5776 } 5777 5778 var state StreamState 5779 mset.store.FastState(&state) 5780 5781 // Make sure this sequence is not below our first sequence. 5782 if seq < state.FirstSeq { 5783 mset.clearPreAck(o, seq) 5784 mset.mu.Unlock() 5785 return 5786 } 5787 5788 // If this has arrived before we have processed the message itself. 5789 if seq > state.LastSeq { 5790 mset.registerPreAck(o, seq) 5791 mset.mu.Unlock() 5792 return 5793 } 5794 5795 var shouldRemove bool 5796 switch mset.cfg.Retention { 5797 case WorkQueuePolicy: 5798 // Normally we just remove a message when its ack'd here but if we have direct consumers 5799 // from sources and/or mirrors we need to make sure they have delivered the msg. 5800 shouldRemove = mset.directs <= 0 || mset.noInterest(seq, o) 5801 case InterestPolicy: 5802 shouldRemove = mset.noInterest(seq, o) 5803 } 5804 mset.mu.Unlock() 5805 5806 // If nothing else to do. 5807 if !shouldRemove { 5808 return 5809 } 5810 5811 // If we are here we should attempt to remove. 5812 if _, err := mset.store.RemoveMsg(seq); err == ErrStoreEOF { 5813 // This should not happen, but being pedantic. 5814 mset.registerPreAckLock(o, seq) 5815 } 5816 } 5817 5818 // Snapshot creates a snapshot for the stream and possibly consumers. 5819 func (mset *stream) snapshot(deadline time.Duration, checkMsgs, includeConsumers bool) (*SnapshotResult, error) { 5820 if mset.closed.Load() { 5821 return nil, errStreamClosed 5822 } 5823 store := mset.store 5824 return store.Snapshot(deadline, checkMsgs, includeConsumers) 5825 } 5826 5827 const snapsDir = "__snapshots__" 5828 5829 // RestoreStream will restore a stream from a snapshot. 5830 func (a *Account) RestoreStream(ncfg *StreamConfig, r io.Reader) (*stream, error) { 5831 if ncfg == nil { 5832 return nil, errors.New("nil config on stream restore") 5833 } 5834 5835 s, jsa, err := a.checkForJetStream() 5836 if err != nil { 5837 return nil, err 5838 } 5839 5840 cfg, apiErr := s.checkStreamCfg(ncfg, a) 5841 if apiErr != nil { 5842 return nil, apiErr 5843 } 5844 5845 sd := filepath.Join(jsa.storeDir, snapsDir) 5846 if _, err := os.Stat(sd); os.IsNotExist(err) { 5847 if err := os.MkdirAll(sd, defaultDirPerms); err != nil { 5848 return nil, fmt.Errorf("could not create snapshots directory - %v", err) 5849 } 5850 } 5851 sdir, err := os.MkdirTemp(sd, "snap-") 5852 if err != nil { 5853 return nil, err 5854 } 5855 if _, err := os.Stat(sdir); os.IsNotExist(err) { 5856 if err := os.MkdirAll(sdir, defaultDirPerms); err != nil { 5857 return nil, fmt.Errorf("could not create snapshots directory - %v", err) 5858 } 5859 } 5860 defer os.RemoveAll(sdir) 5861 5862 logAndReturnError := func() error { 5863 a.mu.RLock() 5864 err := fmt.Errorf("unexpected content (account=%s)", a.Name) 5865 if a.srv != nil { 5866 a.srv.Errorf("Stream restore failed due to %v", err) 5867 } 5868 a.mu.RUnlock() 5869 return err 5870 } 5871 sdirCheck := filepath.Clean(sdir) + string(os.PathSeparator) 5872 5873 tr := tar.NewReader(s2.NewReader(r)) 5874 for { 5875 hdr, err := tr.Next() 5876 if err == io.EOF { 5877 break // End of snapshot 5878 } 5879 if err != nil { 5880 return nil, err 5881 } 5882 if hdr.Typeflag != tar.TypeReg { 5883 return nil, logAndReturnError() 5884 } 5885 fpath := filepath.Join(sdir, filepath.Clean(hdr.Name)) 5886 if !strings.HasPrefix(fpath, sdirCheck) { 5887 return nil, logAndReturnError() 5888 } 5889 os.MkdirAll(filepath.Dir(fpath), defaultDirPerms) 5890 fd, err := os.OpenFile(fpath, os.O_CREATE|os.O_RDWR, 0600) 5891 if err != nil { 5892 return nil, err 5893 } 5894 _, err = io.Copy(fd, tr) 5895 fd.Close() 5896 if err != nil { 5897 return nil, err 5898 } 5899 } 5900 5901 // Check metadata. 5902 // The cfg passed in will be the new identity for the stream. 5903 var fcfg FileStreamInfo 5904 b, err := os.ReadFile(filepath.Join(sdir, JetStreamMetaFile)) 5905 if err != nil { 5906 return nil, err 5907 } 5908 if err := json.Unmarshal(b, &fcfg); err != nil { 5909 return nil, err 5910 } 5911 5912 // Check to make sure names match. 5913 if fcfg.Name != cfg.Name { 5914 return nil, errors.New("stream names do not match") 5915 } 5916 5917 // See if this stream already exists. 5918 if _, err := a.lookupStream(cfg.Name); err == nil { 5919 return nil, NewJSStreamNameExistRestoreFailedError() 5920 } 5921 // Move into the correct place here. 5922 ndir := filepath.Join(jsa.storeDir, streamsDir, cfg.Name) 5923 // Remove old one if for some reason it is still here. 5924 if _, err := os.Stat(ndir); err == nil { 5925 os.RemoveAll(ndir) 5926 } 5927 // Make sure our destination streams directory exists. 5928 if err := os.MkdirAll(filepath.Join(jsa.storeDir, streamsDir), defaultDirPerms); err != nil { 5929 return nil, err 5930 } 5931 // Move into new location. 5932 if err := os.Rename(sdir, ndir); err != nil { 5933 return nil, err 5934 } 5935 5936 if cfg.Template != _EMPTY_ { 5937 if err := jsa.addStreamNameToTemplate(cfg.Template, cfg.Name); err != nil { 5938 return nil, err 5939 } 5940 } 5941 mset, err := a.addStream(&cfg) 5942 if err != nil { 5943 return nil, err 5944 } 5945 if !fcfg.Created.IsZero() { 5946 mset.setCreatedTime(fcfg.Created) 5947 } 5948 lseq := mset.lastSeq() 5949 5950 // Make sure we do an update if the configs have changed. 5951 if !reflect.DeepEqual(fcfg.StreamConfig, cfg) { 5952 if err := mset.update(&cfg); err != nil { 5953 return nil, err 5954 } 5955 } 5956 5957 // Now do consumers. 5958 odir := filepath.Join(ndir, consumerDir) 5959 ofis, _ := os.ReadDir(odir) 5960 for _, ofi := range ofis { 5961 metafile := filepath.Join(odir, ofi.Name(), JetStreamMetaFile) 5962 metasum := filepath.Join(odir, ofi.Name(), JetStreamMetaFileSum) 5963 if _, err := os.Stat(metafile); os.IsNotExist(err) { 5964 mset.stop(true, false) 5965 return nil, fmt.Errorf("error restoring consumer [%q]: %v", ofi.Name(), err) 5966 } 5967 buf, err := os.ReadFile(metafile) 5968 if err != nil { 5969 mset.stop(true, false) 5970 return nil, fmt.Errorf("error restoring consumer [%q]: %v", ofi.Name(), err) 5971 } 5972 if _, err := os.Stat(metasum); os.IsNotExist(err) { 5973 mset.stop(true, false) 5974 return nil, fmt.Errorf("error restoring consumer [%q]: %v", ofi.Name(), err) 5975 } 5976 var cfg FileConsumerInfo 5977 if err := json.Unmarshal(buf, &cfg); err != nil { 5978 mset.stop(true, false) 5979 return nil, fmt.Errorf("error restoring consumer [%q]: %v", ofi.Name(), err) 5980 } 5981 isEphemeral := !isDurableConsumer(&cfg.ConsumerConfig) 5982 if isEphemeral { 5983 // This is an ephermal consumer and this could fail on restart until 5984 // the consumer can reconnect. We will create it as a durable and switch it. 5985 cfg.ConsumerConfig.Durable = ofi.Name() 5986 } 5987 obs, err := mset.addConsumer(&cfg.ConsumerConfig) 5988 if err != nil { 5989 mset.stop(true, false) 5990 return nil, fmt.Errorf("error restoring consumer [%q]: %v", ofi.Name(), err) 5991 } 5992 if isEphemeral { 5993 obs.switchToEphemeral() 5994 } 5995 if !cfg.Created.IsZero() { 5996 obs.setCreatedTime(cfg.Created) 5997 } 5998 obs.mu.Lock() 5999 err = obs.readStoredState(lseq) 6000 obs.mu.Unlock() 6001 if err != nil { 6002 mset.stop(true, false) 6003 return nil, fmt.Errorf("error restoring consumer [%q]: %v", ofi.Name(), err) 6004 } 6005 } 6006 return mset, nil 6007 } 6008 6009 // This is to check for dangling messages on interest retention streams. Only called on account enable. 6010 // Issue https://github.com/nats-io/nats-server/issues/3612 6011 func (mset *stream) checkForOrphanMsgs() { 6012 mset.mu.RLock() 6013 consumers := make([]*consumer, 0, len(mset.consumers)) 6014 for _, o := range mset.consumers { 6015 consumers = append(consumers, o) 6016 } 6017 accName, stream := mset.acc.Name, mset.cfg.Name 6018 6019 var ss StreamState 6020 mset.store.FastState(&ss) 6021 mset.mu.RUnlock() 6022 6023 for _, o := range consumers { 6024 if err := o.checkStateForInterestStream(); err == errAckFloorHigherThanLastSeq { 6025 o.mu.RLock() 6026 s, consumer := o.srv, o.name 6027 state, _ := o.store.State() 6028 asflr := state.AckFloor.Stream 6029 o.mu.RUnlock() 6030 // Warn about stream state vs our ack floor. 6031 s.RateLimitWarnf("Detected consumer '%s > %s > %s' ack floor %d is ahead of stream's last sequence %d", 6032 accName, stream, consumer, asflr, ss.LastSeq) 6033 } 6034 } 6035 } 6036 6037 // Check on startup to make sure that consumers replication matches us. 6038 // Interest retention requires replication matches. 6039 func (mset *stream) checkConsumerReplication() { 6040 mset.mu.RLock() 6041 defer mset.mu.RUnlock() 6042 6043 if mset.cfg.Retention != InterestPolicy { 6044 return 6045 } 6046 6047 s, acc := mset.srv, mset.acc 6048 for _, o := range mset.consumers { 6049 o.mu.RLock() 6050 // Consumer replicas 0 can be a legit config for the replicas and we will inherit from the stream 6051 // when this is the case. 6052 if mset.cfg.Replicas != o.cfg.Replicas && o.cfg.Replicas != 0 { 6053 s.Errorf("consumer '%s > %s > %s' MUST match replication (%d vs %d) of stream with interest policy", 6054 acc, mset.cfg.Name, o.cfg.Name, mset.cfg.Replicas, o.cfg.Replicas) 6055 } 6056 o.mu.RUnlock() 6057 } 6058 } 6059 6060 // Will check if we are running in the monitor already and if not set the appropriate flag. 6061 func (mset *stream) checkInMonitor() bool { 6062 mset.mu.Lock() 6063 defer mset.mu.Unlock() 6064 6065 if mset.inMonitor { 6066 return true 6067 } 6068 mset.inMonitor = true 6069 return false 6070 } 6071 6072 // Clear us being in the monitor routine. 6073 func (mset *stream) clearMonitorRunning() { 6074 mset.mu.Lock() 6075 defer mset.mu.Unlock() 6076 mset.inMonitor = false 6077 }