github.com/nsqio/nsq@v1.3.0/nsqd/channel.go (about) 1 package nsqd 2 3 import ( 4 "container/heap" 5 "errors" 6 "fmt" 7 "math" 8 "strings" 9 "sync" 10 "sync/atomic" 11 "time" 12 13 "github.com/nsqio/go-diskqueue" 14 15 "github.com/nsqio/nsq/internal/lg" 16 "github.com/nsqio/nsq/internal/pqueue" 17 "github.com/nsqio/nsq/internal/quantile" 18 ) 19 20 type Consumer interface { 21 UnPause() 22 Pause() 23 Close() error 24 TimedOutMessage() 25 Stats(string) ClientStats 26 Empty() 27 } 28 29 // Channel represents the concrete type for a NSQ channel (and also 30 // implements the Queue interface) 31 // 32 // There can be multiple channels per topic, each with there own unique set 33 // of subscribers (clients). 34 // 35 // Channels maintain all client and message metadata, orchestrating in-flight 36 // messages, timeouts, requeuing, etc. 37 type Channel struct { 38 // 64bit atomic vars need to be first for proper alignment on 32bit platforms 39 requeueCount uint64 40 messageCount uint64 41 timeoutCount uint64 42 43 sync.RWMutex 44 45 topicName string 46 name string 47 nsqd *NSQD 48 49 backend BackendQueue 50 51 memoryMsgChan chan *Message 52 exitFlag int32 53 exitMutex sync.RWMutex 54 55 // state tracking 56 clients map[int64]Consumer 57 paused int32 58 ephemeral bool 59 deleteCallback func(*Channel) 60 deleter sync.Once 61 62 // Stats tracking 63 e2eProcessingLatencyStream *quantile.Quantile 64 65 // TODO: these can be DRYd up 66 deferredMessages map[MessageID]*pqueue.Item 67 deferredPQ pqueue.PriorityQueue 68 deferredMutex sync.Mutex 69 inFlightMessages map[MessageID]*Message 70 inFlightPQ inFlightPqueue 71 inFlightMutex sync.Mutex 72 } 73 74 // NewChannel creates a new instance of the Channel type and returns a pointer 75 func NewChannel(topicName string, channelName string, nsqd *NSQD, 76 deleteCallback func(*Channel)) *Channel { 77 78 c := &Channel{ 79 topicName: topicName, 80 name: channelName, 81 memoryMsgChan: nil, 82 clients: make(map[int64]Consumer), 83 deleteCallback: deleteCallback, 84 nsqd: nsqd, 85 ephemeral: strings.HasSuffix(channelName, "#ephemeral"), 86 } 87 // avoid mem-queue if size == 0 for more consistent ordering 88 if nsqd.getOpts().MemQueueSize > 0 || c.ephemeral { 89 c.memoryMsgChan = make(chan *Message, nsqd.getOpts().MemQueueSize) 90 } 91 if len(nsqd.getOpts().E2EProcessingLatencyPercentiles) > 0 { 92 c.e2eProcessingLatencyStream = quantile.New( 93 nsqd.getOpts().E2EProcessingLatencyWindowTime, 94 nsqd.getOpts().E2EProcessingLatencyPercentiles, 95 ) 96 } 97 98 c.initPQ() 99 100 if c.ephemeral { 101 c.backend = newDummyBackendQueue() 102 } else { 103 dqLogf := func(level diskqueue.LogLevel, f string, args ...interface{}) { 104 opts := nsqd.getOpts() 105 lg.Logf(opts.Logger, opts.LogLevel, lg.LogLevel(level), f, args...) 106 } 107 // backend names, for uniqueness, automatically include the topic... 108 backendName := getBackendName(topicName, channelName) 109 c.backend = diskqueue.New( 110 backendName, 111 nsqd.getOpts().DataPath, 112 nsqd.getOpts().MaxBytesPerFile, 113 int32(minValidMsgLength), 114 int32(nsqd.getOpts().MaxMsgSize)+minValidMsgLength, 115 nsqd.getOpts().SyncEvery, 116 nsqd.getOpts().SyncTimeout, 117 dqLogf, 118 ) 119 } 120 121 c.nsqd.Notify(c, !c.ephemeral) 122 123 return c 124 } 125 126 func (c *Channel) initPQ() { 127 pqSize := int(math.Max(1, float64(c.nsqd.getOpts().MemQueueSize)/10)) 128 129 c.inFlightMutex.Lock() 130 c.inFlightMessages = make(map[MessageID]*Message) 131 c.inFlightPQ = newInFlightPqueue(pqSize) 132 c.inFlightMutex.Unlock() 133 134 c.deferredMutex.Lock() 135 c.deferredMessages = make(map[MessageID]*pqueue.Item) 136 c.deferredPQ = pqueue.New(pqSize) 137 c.deferredMutex.Unlock() 138 } 139 140 // Exiting returns a boolean indicating if this channel is closed/exiting 141 func (c *Channel) Exiting() bool { 142 return atomic.LoadInt32(&c.exitFlag) == 1 143 } 144 145 // Delete empties the channel and closes 146 func (c *Channel) Delete() error { 147 return c.exit(true) 148 } 149 150 // Close cleanly closes the Channel 151 func (c *Channel) Close() error { 152 return c.exit(false) 153 } 154 155 func (c *Channel) exit(deleted bool) error { 156 c.exitMutex.Lock() 157 defer c.exitMutex.Unlock() 158 159 if !atomic.CompareAndSwapInt32(&c.exitFlag, 0, 1) { 160 return errors.New("exiting") 161 } 162 163 if deleted { 164 c.nsqd.logf(LOG_INFO, "CHANNEL(%s): deleting", c.name) 165 166 // since we are explicitly deleting a channel (not just at system exit time) 167 // de-register this from the lookupd 168 c.nsqd.Notify(c, !c.ephemeral) 169 } else { 170 c.nsqd.logf(LOG_INFO, "CHANNEL(%s): closing", c.name) 171 } 172 173 // this forceably closes client connections 174 c.RLock() 175 for _, client := range c.clients { 176 client.Close() 177 } 178 c.RUnlock() 179 180 if deleted { 181 // empty the queue (deletes the backend files, too) 182 c.Empty() 183 return c.backend.Delete() 184 } 185 186 // write anything leftover to disk 187 c.flush() 188 return c.backend.Close() 189 } 190 191 func (c *Channel) Empty() error { 192 c.Lock() 193 defer c.Unlock() 194 195 c.initPQ() 196 for _, client := range c.clients { 197 client.Empty() 198 } 199 200 for { 201 select { 202 case <-c.memoryMsgChan: 203 default: 204 goto finish 205 } 206 } 207 208 finish: 209 return c.backend.Empty() 210 } 211 212 // flush persists all the messages in internal memory buffers to the backend 213 // it does not drain inflight/deferred because it is only called in Close() 214 func (c *Channel) flush() error { 215 if len(c.memoryMsgChan) > 0 || len(c.inFlightMessages) > 0 || len(c.deferredMessages) > 0 { 216 c.nsqd.logf(LOG_INFO, "CHANNEL(%s): flushing %d memory %d in-flight %d deferred messages to backend", 217 c.name, len(c.memoryMsgChan), len(c.inFlightMessages), len(c.deferredMessages)) 218 } 219 220 for { 221 select { 222 case msg := <-c.memoryMsgChan: 223 err := writeMessageToBackend(msg, c.backend) 224 if err != nil { 225 c.nsqd.logf(LOG_ERROR, "failed to write message to backend - %s", err) 226 } 227 default: 228 goto finish 229 } 230 } 231 232 finish: 233 c.inFlightMutex.Lock() 234 for _, msg := range c.inFlightMessages { 235 err := writeMessageToBackend(msg, c.backend) 236 if err != nil { 237 c.nsqd.logf(LOG_ERROR, "failed to write message to backend - %s", err) 238 } 239 } 240 c.inFlightMutex.Unlock() 241 242 c.deferredMutex.Lock() 243 for _, item := range c.deferredMessages { 244 msg := item.Value.(*Message) 245 err := writeMessageToBackend(msg, c.backend) 246 if err != nil { 247 c.nsqd.logf(LOG_ERROR, "failed to write message to backend - %s", err) 248 } 249 } 250 c.deferredMutex.Unlock() 251 252 return nil 253 } 254 255 func (c *Channel) Depth() int64 { 256 return int64(len(c.memoryMsgChan)) + c.backend.Depth() 257 } 258 259 func (c *Channel) Pause() error { 260 return c.doPause(true) 261 } 262 263 func (c *Channel) UnPause() error { 264 return c.doPause(false) 265 } 266 267 func (c *Channel) doPause(pause bool) error { 268 if pause { 269 atomic.StoreInt32(&c.paused, 1) 270 } else { 271 atomic.StoreInt32(&c.paused, 0) 272 } 273 274 c.RLock() 275 for _, client := range c.clients { 276 if pause { 277 client.Pause() 278 } else { 279 client.UnPause() 280 } 281 } 282 c.RUnlock() 283 return nil 284 } 285 286 func (c *Channel) IsPaused() bool { 287 return atomic.LoadInt32(&c.paused) == 1 288 } 289 290 // PutMessage writes a Message to the queue 291 func (c *Channel) PutMessage(m *Message) error { 292 c.exitMutex.RLock() 293 defer c.exitMutex.RUnlock() 294 if c.Exiting() { 295 return errors.New("exiting") 296 } 297 err := c.put(m) 298 if err != nil { 299 return err 300 } 301 atomic.AddUint64(&c.messageCount, 1) 302 return nil 303 } 304 305 func (c *Channel) put(m *Message) error { 306 select { 307 case c.memoryMsgChan <- m: 308 default: 309 err := writeMessageToBackend(m, c.backend) 310 c.nsqd.SetHealth(err) 311 if err != nil { 312 c.nsqd.logf(LOG_ERROR, "CHANNEL(%s): failed to write message to backend - %s", 313 c.name, err) 314 return err 315 } 316 } 317 return nil 318 } 319 320 func (c *Channel) PutMessageDeferred(msg *Message, timeout time.Duration) { 321 atomic.AddUint64(&c.messageCount, 1) 322 c.StartDeferredTimeout(msg, timeout) 323 } 324 325 // TouchMessage resets the timeout for an in-flight message 326 func (c *Channel) TouchMessage(clientID int64, id MessageID, clientMsgTimeout time.Duration) error { 327 msg, err := c.popInFlightMessage(clientID, id) 328 if err != nil { 329 return err 330 } 331 c.removeFromInFlightPQ(msg) 332 333 newTimeout := time.Now().Add(clientMsgTimeout) 334 if newTimeout.Sub(msg.deliveryTS) >= 335 c.nsqd.getOpts().MaxMsgTimeout { 336 // we would have gone over, set to the max 337 newTimeout = msg.deliveryTS.Add(c.nsqd.getOpts().MaxMsgTimeout) 338 } 339 340 msg.pri = newTimeout.UnixNano() 341 err = c.pushInFlightMessage(msg) 342 if err != nil { 343 return err 344 } 345 c.addToInFlightPQ(msg) 346 return nil 347 } 348 349 // FinishMessage successfully discards an in-flight message 350 func (c *Channel) FinishMessage(clientID int64, id MessageID) error { 351 msg, err := c.popInFlightMessage(clientID, id) 352 if err != nil { 353 return err 354 } 355 c.removeFromInFlightPQ(msg) 356 if c.e2eProcessingLatencyStream != nil { 357 c.e2eProcessingLatencyStream.Insert(msg.Timestamp) 358 } 359 return nil 360 } 361 362 // RequeueMessage requeues a message based on `time.Duration`, ie: 363 // 364 // `timeoutMs` == 0 - requeue a message immediately 365 // `timeoutMs` > 0 - asynchronously wait for the specified timeout 366 // 367 // and requeue a message (aka "deferred requeue") 368 func (c *Channel) RequeueMessage(clientID int64, id MessageID, timeout time.Duration) error { 369 // remove from inflight first 370 msg, err := c.popInFlightMessage(clientID, id) 371 if err != nil { 372 return err 373 } 374 c.removeFromInFlightPQ(msg) 375 atomic.AddUint64(&c.requeueCount, 1) 376 377 if timeout == 0 { 378 c.exitMutex.RLock() 379 if c.Exiting() { 380 c.exitMutex.RUnlock() 381 return errors.New("exiting") 382 } 383 err := c.put(msg) 384 c.exitMutex.RUnlock() 385 return err 386 } 387 388 // deferred requeue 389 return c.StartDeferredTimeout(msg, timeout) 390 } 391 392 // AddClient adds a client to the Channel's client list 393 func (c *Channel) AddClient(clientID int64, client Consumer) error { 394 c.exitMutex.RLock() 395 defer c.exitMutex.RUnlock() 396 397 if c.Exiting() { 398 return errors.New("exiting") 399 } 400 401 c.RLock() 402 _, ok := c.clients[clientID] 403 numClients := len(c.clients) 404 c.RUnlock() 405 if ok { 406 return nil 407 } 408 409 maxChannelConsumers := c.nsqd.getOpts().MaxChannelConsumers 410 if maxChannelConsumers != 0 && numClients >= maxChannelConsumers { 411 return fmt.Errorf("consumers for %s:%s exceeds limit of %d", 412 c.topicName, c.name, maxChannelConsumers) 413 } 414 415 c.Lock() 416 c.clients[clientID] = client 417 c.Unlock() 418 return nil 419 } 420 421 // RemoveClient removes a client from the Channel's client list 422 func (c *Channel) RemoveClient(clientID int64) { 423 c.exitMutex.RLock() 424 defer c.exitMutex.RUnlock() 425 426 if c.Exiting() { 427 return 428 } 429 430 c.RLock() 431 _, ok := c.clients[clientID] 432 c.RUnlock() 433 if !ok { 434 return 435 } 436 437 c.Lock() 438 delete(c.clients, clientID) 439 numClients := len(c.clients) 440 c.Unlock() 441 442 if numClients == 0 && c.ephemeral { 443 go c.deleter.Do(func() { c.deleteCallback(c) }) 444 } 445 } 446 447 func (c *Channel) StartInFlightTimeout(msg *Message, clientID int64, timeout time.Duration) error { 448 now := time.Now() 449 msg.clientID = clientID 450 msg.deliveryTS = now 451 msg.pri = now.Add(timeout).UnixNano() 452 err := c.pushInFlightMessage(msg) 453 if err != nil { 454 return err 455 } 456 c.addToInFlightPQ(msg) 457 return nil 458 } 459 460 func (c *Channel) StartDeferredTimeout(msg *Message, timeout time.Duration) error { 461 absTs := time.Now().Add(timeout).UnixNano() 462 item := &pqueue.Item{Value: msg, Priority: absTs} 463 err := c.pushDeferredMessage(item) 464 if err != nil { 465 return err 466 } 467 c.addToDeferredPQ(item) 468 return nil 469 } 470 471 // pushInFlightMessage atomically adds a message to the in-flight dictionary 472 func (c *Channel) pushInFlightMessage(msg *Message) error { 473 c.inFlightMutex.Lock() 474 _, ok := c.inFlightMessages[msg.ID] 475 if ok { 476 c.inFlightMutex.Unlock() 477 return errors.New("ID already in flight") 478 } 479 c.inFlightMessages[msg.ID] = msg 480 c.inFlightMutex.Unlock() 481 return nil 482 } 483 484 // popInFlightMessage atomically removes a message from the in-flight dictionary 485 func (c *Channel) popInFlightMessage(clientID int64, id MessageID) (*Message, error) { 486 c.inFlightMutex.Lock() 487 msg, ok := c.inFlightMessages[id] 488 if !ok { 489 c.inFlightMutex.Unlock() 490 return nil, errors.New("ID not in flight") 491 } 492 if msg.clientID != clientID { 493 c.inFlightMutex.Unlock() 494 return nil, errors.New("client does not own message") 495 } 496 delete(c.inFlightMessages, id) 497 c.inFlightMutex.Unlock() 498 return msg, nil 499 } 500 501 func (c *Channel) addToInFlightPQ(msg *Message) { 502 c.inFlightMutex.Lock() 503 c.inFlightPQ.Push(msg) 504 c.inFlightMutex.Unlock() 505 } 506 507 func (c *Channel) removeFromInFlightPQ(msg *Message) { 508 c.inFlightMutex.Lock() 509 if msg.index == -1 { 510 // this item has already been popped off the pqueue 511 c.inFlightMutex.Unlock() 512 return 513 } 514 c.inFlightPQ.Remove(msg.index) 515 c.inFlightMutex.Unlock() 516 } 517 518 func (c *Channel) pushDeferredMessage(item *pqueue.Item) error { 519 c.deferredMutex.Lock() 520 // TODO: these map lookups are costly 521 id := item.Value.(*Message).ID 522 _, ok := c.deferredMessages[id] 523 if ok { 524 c.deferredMutex.Unlock() 525 return errors.New("ID already deferred") 526 } 527 c.deferredMessages[id] = item 528 c.deferredMutex.Unlock() 529 return nil 530 } 531 532 func (c *Channel) popDeferredMessage(id MessageID) (*pqueue.Item, error) { 533 c.deferredMutex.Lock() 534 // TODO: these map lookups are costly 535 item, ok := c.deferredMessages[id] 536 if !ok { 537 c.deferredMutex.Unlock() 538 return nil, errors.New("ID not deferred") 539 } 540 delete(c.deferredMessages, id) 541 c.deferredMutex.Unlock() 542 return item, nil 543 } 544 545 func (c *Channel) addToDeferredPQ(item *pqueue.Item) { 546 c.deferredMutex.Lock() 547 heap.Push(&c.deferredPQ, item) 548 c.deferredMutex.Unlock() 549 } 550 551 func (c *Channel) processDeferredQueue(t int64) bool { 552 c.exitMutex.RLock() 553 defer c.exitMutex.RUnlock() 554 555 if c.Exiting() { 556 return false 557 } 558 559 dirty := false 560 for { 561 c.deferredMutex.Lock() 562 item, _ := c.deferredPQ.PeekAndShift(t) 563 c.deferredMutex.Unlock() 564 565 if item == nil { 566 goto exit 567 } 568 dirty = true 569 570 msg := item.Value.(*Message) 571 _, err := c.popDeferredMessage(msg.ID) 572 if err != nil { 573 goto exit 574 } 575 c.put(msg) 576 } 577 578 exit: 579 return dirty 580 } 581 582 func (c *Channel) processInFlightQueue(t int64) bool { 583 c.exitMutex.RLock() 584 defer c.exitMutex.RUnlock() 585 586 if c.Exiting() { 587 return false 588 } 589 590 dirty := false 591 for { 592 c.inFlightMutex.Lock() 593 msg, _ := c.inFlightPQ.PeekAndShift(t) 594 c.inFlightMutex.Unlock() 595 596 if msg == nil { 597 goto exit 598 } 599 dirty = true 600 601 _, err := c.popInFlightMessage(msg.clientID, msg.ID) 602 if err != nil { 603 goto exit 604 } 605 atomic.AddUint64(&c.timeoutCount, 1) 606 c.RLock() 607 client, ok := c.clients[msg.clientID] 608 c.RUnlock() 609 if ok { 610 client.TimedOutMessage() 611 } 612 c.put(msg) 613 } 614 615 exit: 616 return dirty 617 }